!21149 Sync MD code clean to master

Merge pull request !21149 from xiefangqi/md_sync_codeclean_master
This commit is contained in:
i-robot 2021-08-21 06:43:20 +00:00 committed by Gitee
commit 0f3de77e93
130 changed files with 947 additions and 537 deletions

View File

@ -593,14 +593,16 @@ SchemaObj::SchemaObj(const std::vector<char> &schema_file) : data_(std::make_sha
// SchemaObj Init function // SchemaObj Init function
Status SchemaObj::Init() { Status SchemaObj::Init() {
if (!data_->schema_file_.empty()) { if (data_ != nullptr && !data_->schema_file_.empty()) {
Path schema_file(data_->schema_file_); std::string real_path;
RETURN_IF_NOT_OK(Path::RealPath(data_->schema_file_, real_path));
Path schema_file(real_path);
CHECK_FAIL_RETURN_UNEXPECTED(schema_file.Exists(), CHECK_FAIL_RETURN_UNEXPECTED(schema_file.Exists(),
"The file " + data_->schema_file_ + " does not exist or permission denied!"); "The file " + data_->schema_file_ + " does not exist or permission denied!");
nlohmann::json js; nlohmann::json js;
try { try {
std::ifstream in(data_->schema_file_); std::ifstream in(real_path);
in >> js; in >> js;
CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(), CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(),
"\"columns\" node is required in the schema json file."); "\"columns\" node is required in the schema json file.");

View File

@ -27,7 +27,8 @@ Iterator::~Iterator() { Stop(); }
// Get the next row from the data pipeline. // Get the next row from the data pipeline.
Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) { Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) {
// Clean data row RETURN_UNEXPECTED_IF_NULL(row);
// Clean data buffer
row->clear(); row->clear();
std::unordered_map<std::string, std::shared_ptr<dataset::Tensor>> md_map; std::unordered_map<std::string, std::shared_ptr<dataset::Tensor>> md_map;
Status rc = consumer_->GetNextAsMap(&md_map); Status rc = consumer_->GetNextAsMap(&md_map);
@ -47,6 +48,7 @@ Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) {
// Get the next row from the data pipeline. // Get the next row from the data pipeline.
Status Iterator::GetNextRow(MSTensorVec *row) { Status Iterator::GetNextRow(MSTensorVec *row) {
// Clean data row // Clean data row
RETURN_UNEXPECTED_IF_NULL(row);
row->clear(); row->clear();
// create a dataset tensor row and fetch. Then we convert the output to MSTensor // create a dataset tensor row and fetch. Then we convert the output to MSTensor
std::vector<std::shared_ptr<dataset::Tensor>> md_row; std::vector<std::shared_ptr<dataset::Tensor>> md_row;
@ -84,6 +86,7 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds, int32_t num_epo
PullIterator::PullIterator() : pull_consumer_(nullptr) {} PullIterator::PullIterator() : pull_consumer_(nullptr) {}
// Get the next row from the data pipeline. // Get the next row from the data pipeline.
Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row) { Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row) {
RETURN_UNEXPECTED_IF_NULL(row);
for (int i = 0; i < num_rows; i++) { for (int i = 0; i < num_rows; i++) {
std::vector<std::shared_ptr<dataset::Tensor>> md_row; std::vector<std::shared_ptr<dataset::Tensor>> md_row;
Status rc = pull_consumer_->GetNextAsVector(&md_row); Status rc = pull_consumer_->GetNextAsVector(&md_row);
@ -105,6 +108,7 @@ Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const r
} }
Status PullIterator::GetNextRow(MSTensorVec *const row) { Status PullIterator::GetNextRow(MSTensorVec *const row) {
RETURN_UNEXPECTED_IF_NULL(row);
CHECK_FAIL_RETURN_UNEXPECTED(pull_consumer_ != nullptr, "Consumer is nullptr."); CHECK_FAIL_RETURN_UNEXPECTED(pull_consumer_ != nullptr, "Consumer is nullptr.");
std::vector<std::shared_ptr<dataset::Tensor>> md_row; std::vector<std::shared_ptr<dataset::Tensor>> md_row;
Status rc = pull_consumer_->GetNextAsVector(&md_row); Status rc = pull_consumer_->GetNextAsVector(&md_row);

View File

@ -107,6 +107,7 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) {
nlohmann::json js; nlohmann::json js;
in >> js; in >> js;
rc = FromJson(js); rc = FromJson(js);
in.close();
} catch (const nlohmann::json::type_error &e) { } catch (const nlohmann::json::type_error &e) {
std::ostringstream ss; std::ostringstream ss;
ss << "Client file failed to load:\n" << e.what(); ss << "Client file failed to load:\n" << e.what();

View File

@ -29,8 +29,10 @@ CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor))
} }
Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) { Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) {
RETURN_UNEXPECTED_IF_NULL(out);
const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator(); const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
*out = std::allocate_shared<CVTensor>(*alloc, shape, type); *out = std::allocate_shared<CVTensor>(*alloc, shape, type);
RETURN_UNEXPECTED_IF_NULL(out);
int64_t byte_size = (*out)->SizeInBytes(); int64_t byte_size = (*out)->SizeInBytes();
// Don't allocate if we have a tensor with no elements. // Don't allocate if we have a tensor with no elements.
if (byte_size != 0) { if (byte_size != 0) {
@ -41,6 +43,7 @@ Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPt
} }
Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out) { Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out) {
RETURN_UNEXPECTED_IF_NULL(out);
TensorPtr out_tensor; TensorPtr out_tensor;
cv::Mat mat_local = mat; cv::Mat mat_local = mat;
// if the input Mat's memory is not continuous, copy it to one block of memory // if the input Mat's memory is not continuous, copy it to one block of memory
@ -78,6 +81,9 @@ std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &sha
} }
std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) { std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
if (t == nullptr) {
return nullptr;
}
std::shared_ptr<CVTensor> cv_t = std::dynamic_pointer_cast<CVTensor>(t); std::shared_ptr<CVTensor> cv_t = std::dynamic_pointer_cast<CVTensor>(t);
if (cv_t != nullptr) { if (cv_t != nullptr) {
return cv_t; return cv_t;
@ -88,13 +94,13 @@ std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
} }
Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat) { Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat) {
std::pair<std::array<int, 2>, int> cv_shape_type = IsValidImage(shape, type); RETURN_UNEXPECTED_IF_NULL(data);
RETURN_UNEXPECTED_IF_NULL(mat);
const int kShapeAsDefault = 2;
std::pair<std::array<int, kShapeAsDefault>, int> cv_shape_type = IsValidImage(shape, type);
if (cv_shape_type.second == -1) { if (cv_shape_type.second == -1) {
std::vector<dsize_t> sizes = shape.AsVector(); std::vector<dsize_t> sizes = shape.AsVector();
std::vector<int> sizes32(sizes.begin(), sizes.end()); // convert long to int for usage with OpenCV std::vector<int> sizes32(sizes.begin(), sizes.end()); // convert long to int for usage with OpenCV
if (static_cast<int>(shape.Rank()) != shape.Rank()) {
RETURN_STATUS_UNEXPECTED("Error in creating CV mat. Wrong shape.");
}
uint8_t cv_type = type.AsCVType(); uint8_t cv_type = type.AsCVType();
if (cv_type == kCVInvalidType) { if (cv_type == kCVInvalidType) {
@ -102,7 +108,7 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &
} }
*mat = cv::Mat(static_cast<int>(shape.Rank()), &sizes32[0], cv_type, data); *mat = cv::Mat(static_cast<int>(shape.Rank()), &sizes32[0], cv_type, data);
} else { } else {
*mat = cv::Mat(2, &(cv_shape_type.first[0]), cv_shape_type.second, data); *mat = cv::Mat(kShapeAsDefault, &(cv_shape_type.first[0]), cv_shape_type.second, data);
} }
return Status::OK(); return Status::OK();
} }
@ -121,10 +127,14 @@ Status CVTensor::ExpandDim(const dsize_t &axis) {
void CVTensor::Squeeze() { void CVTensor::Squeeze() {
Tensor::Squeeze(); Tensor::Squeeze();
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); Status rc = this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
if (rc.IsError()) {
MS_LOG(ERROR) << "Squeeze failed, error details is " << rc;
}
} }
Status CVTensor::MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat) { Status CVTensor::MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat) {
RETURN_UNEXPECTED_IF_NULL(mat);
uchar *start = nullptr; uchar *start = nullptr;
TensorShape remaining({-1}); TensorShape remaining({-1});
RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining)); RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining));

View File

@ -143,15 +143,15 @@ class DataType {
constexpr bool operator!=(const Type a) const { return type_ != a; } constexpr bool operator!=(const Type a) const { return type_ != a; }
// Disable this usage `if(d)` where d is of type DataType // Disable this usage `if(d)` where d is of type DataType
/// \return /// \return return nothing since we deiable this function.
operator bool() = delete; operator bool() = delete;
// To be used in Switch/case // To be used in Switch/case
/// \return /// \return data type internal.
operator Type() const { return type_; } operator Type() const { return type_; }
// The number of bytes needed to store one value of this type // The number of bytes needed to store one value of this type
/// \return /// \return the number of bytes of the type.
uint8_t SizeInBytes() const; uint8_t SizeInBytes() const;
#ifndef ENABLE_ANDROID #ifndef ENABLE_ANDROID

View File

@ -41,15 +41,17 @@ DETensor::DETensor(std::shared_ptr<dataset::DeviceTensor> device_tensor_impl, bo
: device_tensor_impl_(device_tensor_impl), name_("MindDataDeviceTensor"), is_device_(is_device) { : device_tensor_impl_(device_tensor_impl), name_("MindDataDeviceTensor"), is_device_(is_device) {
// The sequence of shape_ is (width, widthStride, height, heightStride) in Dvpp module // The sequence of shape_ is (width, widthStride, height, heightStride) in Dvpp module
// We need to add [1]widthStride and [3]heightStride, which are actual YUV image shape, into shape_ attribute // We need to add [1]widthStride and [3]heightStride, which are actual YUV image shape, into shape_ attribute
uint8_t flag = 0; if (device_tensor_impl && device_tensor_impl->GetYuvStrideShape().size() > 0) {
for (auto &i : device_tensor_impl->GetYuvStrideShape()) { uint8_t flag = 0;
if (flag % 2 == 1) { for (auto &i : device_tensor_impl->GetYuvStrideShape()) {
int64_t j = static_cast<int64_t>(i); if (flag % 2 == 1) {
shape_.emplace_back(j); int64_t j = static_cast<int64_t>(i);
shape_.emplace_back(j);
}
++flag;
} }
++flag; std::reverse(shape_.begin(), shape_.end());
} }
std::reverse(shape_.begin(), shape_.end());
MS_LOG(INFO) << "This is a YUV420 format image, one pixel takes 1.5 bytes. Therefore, the shape of" MS_LOG(INFO) << "This is a YUV420 format image, one pixel takes 1.5 bytes. Therefore, the shape of"
<< " image is in (H, W) format. You can search for more information about YUV420 format"; << " image is in (H, W) format. You can search for more information about YUV420 format";
} }

View File

@ -23,7 +23,10 @@
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) { const int kYuvDefaultChannels = 4;
DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type)
: Tensor(shape, type), device_data_(nullptr), size_(0) {
// grab the mem pool from global context and create the allocator for char data area // grab the mem pool from global context and create the allocator for char data area
std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
@ -34,6 +37,7 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Ten
Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr<DeviceTensor> *out) { Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr<DeviceTensor> *out) {
CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape."); CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Invalid nullptr pointer.");
const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator(); const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator();
*out = std::allocate_shared<DeviceTensor>(*alloc, shape, type); *out = std::allocate_shared<DeviceTensor>(*alloc, shape, type);
// if it's a string tensor and it has no elements, Just initialize the shape and type. // if it's a string tensor and it has no elements, Just initialize the shape and type.
@ -42,6 +46,7 @@ Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type,
} }
CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric."); CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory faiiled.");
int64_t bytes = (*out)->SizeInBytes(); int64_t bytes = (*out)->SizeInBytes();
// Don't allocate if we have a tensor with no elements. // Don't allocate if we have a tensor with no elements.
@ -58,9 +63,11 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data
CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
CHECK_FAIL_RETURN_UNEXPECTED(data_ptr != nullptr, "Data pointer is NULL"); CHECK_FAIL_RETURN_UNEXPECTED(data_ptr != nullptr, "Data pointer is NULL");
CHECK_FAIL_RETURN_UNEXPECTED(dataSize > 0, "Invalid data size"); CHECK_FAIL_RETURN_UNEXPECTED(dataSize > 0, "Invalid data size");
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Out pointer is NULL");
const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator(); const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator();
*out = std::allocate_shared<DeviceTensor>(*alloc, shape, type); *out = std::allocate_shared<DeviceTensor>(*alloc, shape, type);
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
// if it's a string tensor and it has no elements, Just initialize the shape and type. // if it's a string tensor and it has no elements, Just initialize the shape and type.
if (!type.IsNumeric() && shape.NumOfElements() == 0) { if (!type.IsNumeric() && shape.NumOfElements() == 0) {
@ -76,6 +83,8 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size)); RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
} }
CHECK_FAIL_RETURN_UNEXPECTED(attributes.size() >= kYuvDefaultChannels,
"Invalid attributes size, should be greater than 4.");
CHECK_FAIL_RETURN_UNEXPECTED( CHECK_FAIL_RETURN_UNEXPECTED(
(*out)->SetAttributes(data_ptr, dataSize, attributes[0], attributes[1], attributes[2], attributes[3]), (*out)->SetAttributes(data_ptr, dataSize, attributes[0], attributes[1], attributes[2], attributes[3]),
"Fail to set attributes for DeviceTensor"); "Fail to set attributes for DeviceTensor");
@ -129,6 +138,7 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) {
#ifdef ENABLE_ACL #ifdef ENABLE_ACL
Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) { Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) {
CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "host tensor pointer is NULL.");
void *resHostBuf = nullptr; void *resHostBuf = nullptr;
APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize()); APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize());
if (ret != APP_ERR_OK) { if (ret != APP_ERR_OK) {
@ -151,13 +161,18 @@ Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) {
mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize(); mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize();
const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1}); const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1});
CHECK_FAIL_RETURN_UNEXPECTED(this->GetYuvStrideShape().size() >= kYuvDefaultChannels,
"Invalid YuvShape, should greater than 4");
uint32_t _output_width_ = this->GetYuvStrideShape()[0]; uint32_t _output_width_ = this->GetYuvStrideShape()[0];
uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1]; uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1];
uint32_t _output_height_ = this->GetYuvStrideShape()[2]; uint32_t _output_height_ = this->GetYuvStrideShape()[2];
uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3]; uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3];
const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8); const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8);
mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor); RETURN_IF_NOT_OK(mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor));
CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "Allocate memory failed.");
(*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_); (*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_);
if (!(*host_tensor)->HasData()) { if (!(*host_tensor)->HasData()) {

View File

@ -39,7 +39,9 @@ struct npy_scalar_caster {
bool load(handle src, bool convert) { bool load(handle src, bool convert) {
// Taken from Eigen casters. Permits either scalar dtype or scalar array. // Taken from Eigen casters. Permits either scalar dtype or scalar array.
handle type = dtype::of<T>().attr("type"); // Could make more efficient. handle type = dtype::of<T>().attr("type"); // Could make more efficient.
if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) return false; if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) {
return false;
}
Array tmp = Array::ensure(src); Array tmp = Array::ensure(src);
if (tmp && tmp.size() == 1 && tmp.ndim() == 0) { if (tmp && tmp.size() == 1 && tmp.ndim() == 0) {

View File

@ -91,8 +91,10 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) { Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape."); CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
RETURN_UNEXPECTED_IF_NULL(out);
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, shape, type); *out = std::allocate_shared<Tensor>(*alloc, shape, type);
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
// if it's a string tensor and it has no elements, Just initialize the shape and type. // if it's a string tensor and it has no elements, Just initialize the shape and type.
if (!type.IsNumeric() && shape.NumOfElements() == 0) { if (!type.IsNumeric() && shape.NumOfElements() == 0) {
return Status::OK(); return Status::OK();
@ -110,7 +112,7 @@ Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, Tenso
} }
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) { Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
RETURN_IF_NOT_OK(CreateEmpty(shape, type, out)); RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
if (src != nullptr) { if (src != nullptr && out != nullptr) {
// Given the shape/type of this tensor, compute the data size and copy in the input bytes. // Given the shape/type of this tensor, compute the data size and copy in the input bytes.
int64_t byte_size = (*out)->SizeInBytes(); int64_t byte_size = (*out)->SizeInBytes();
if (byte_size == 0) { if (byte_size == 0) {
@ -129,9 +131,11 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type,
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src, Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src,
const dsize_t &length, TensorPtr *out) { const dsize_t &length, TensorPtr *out) {
CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr, "Pointer to source data is null."); RETURN_UNEXPECTED_IF_NULL(src);
RETURN_UNEXPECTED_IF_NULL(out);
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, shape, type); *out = std::allocate_shared<Tensor>(*alloc, shape, type);
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
if (type.IsNumeric()) { if (type.IsNumeric()) {
dsize_t calculated_length = (*out)->SizeInBytes(); dsize_t calculated_length = (*out)->SizeInBytes();
CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape."); CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
@ -159,6 +163,7 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type,
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) { Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<dsize_t> shape; std::vector<dsize_t> shape;
for (dsize_t i = 0; i < arr.ndim(); i++) { for (dsize_t i = 0; i < arr.ndim(); i++) {
shape.push_back(static_cast<dsize_t>(arr.shape()[i])); shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
@ -167,9 +172,11 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
std::vector<std::string> strings; std::vector<std::string> strings;
if (arr.dtype().kind() == 'U') { if (arr.dtype().kind() == 'U') {
std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); }); (void)std::for_each(arr.begin(), arr.end(),
[&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
} else { } else {
std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); }); (void)std::for_each(arr.begin(), arr.end(),
[&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
} }
arr.resize(shape); // resize arr back to the original shape arr.resize(shape); // resize arr back to the original shape
@ -178,6 +185,7 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
} }
Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) { Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) {
RETURN_UNEXPECTED_IF_NULL(out);
if (DataType::FromNpArray(arr) == DataType::DE_STRING) { if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
return CreateFromNpString(arr, out); return CreateFromNpString(arr, out);
} }
@ -191,7 +199,7 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *
shape.push_back(static_cast<dsize_t>(arr.shape()[i])); shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
strides.push_back(static_cast<dsize_t>(arr.strides()[i])); strides.push_back(static_cast<dsize_t>(arr.strides()[i]));
// in case of empty array num_items=0 // in case of empty array num_items=0
if (count != 0) { if (count != 0 && shape.size() > i && shape[i] != 0) {
count /= shape[i]; count /= shape[i];
if (strides[i] != arr.itemsize() * count) { if (strides[i] != arr.itemsize() * count) {
is_strided = true; is_strided = true;
@ -213,9 +221,11 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *
#ifndef ENABLE_ANDROID #ifndef ENABLE_ANDROID
Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) { Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
RETURN_UNEXPECTED_IF_NULL(out);
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
DataType(DataType::DE_STRING)); DataType(DataType::DE_STRING));
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
// total bytes needed = offset array + strings // total bytes needed = offset array + strings
// offset array needs to store one offset var per element + 1 extra to get the length of the last string. // offset array needs to store one offset var per element + 1 extra to get the length of the last string.
// strings will be null-terminated --> need 1 extra byte per element // strings will be null-terminated --> need 1 extra byte per element
@ -236,9 +246,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
num_bytes -= kOffsetSize; num_bytes -= kOffsetSize;
// insert actual string // insert actual string
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1); int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
if (ret_code != 0) { CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Cannot copy string into Tensor");
MS_LOG(ERROR) << "Cannot copy string into Tensor";
}
// next string will be stored right after the current one. // next string will be stored right after the current one.
offset = offset + str.length() + 1; offset = offset + str.length() + 1;
// total bytes are reduced by the length of the string // total bytes are reduced by the length of the string
@ -257,6 +265,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
#endif #endif
Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) { Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
RETURN_UNEXPECTED_IF_NULL(out);
Path file(path); Path file(path);
if (file.IsDirectory()) { if (file.IsDirectory()) {
RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory."); RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory.");
@ -269,8 +278,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *
CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path); CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path);
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount(); int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), if (!(written_bytes == num_bytes && fs.good())) {
"Error in writing to tensor, check path: " + path); fs.close();
RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path);
}
fs.close(); fs.close();
return Status::OK(); return Status::OK();
} }
@ -278,8 +289,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *
#ifndef ENABLE_ANDROID #ifndef ENABLE_ANDROID
Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
const DataType &type, dsize_t pad_size, TensorPtr *out) { const DataType &type, dsize_t pad_size, TensorPtr *out) {
RETURN_UNEXPECTED_IF_NULL(out);
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
RETURN_UNEXPECTED_IF_NULL(out);
unsigned char *current_tensor_addr = (*out)->GetMutableBuffer(); unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size; int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
@ -313,18 +326,23 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
// Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied) // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape, Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
std::vector<dsize_t> strides, uint8_t type_size) { std::vector<dsize_t> strides, uint8_t type_size) {
RETURN_UNEXPECTED_IF_NULL(dst);
RETURN_UNEXPECTED_IF_NULL(src);
dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>()); dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
for (dsize_t i = 0; i < size; ++i) { for (dsize_t i = 0; i < size; ++i) {
dsize_t offset = 0; dsize_t offset = 0;
dsize_t count = i; dsize_t count = i;
for (size_t j = 0; j < shape.size(); ++j) { for (size_t j = 0; j < shape.size(); ++j) {
// convert 1d array's index to 3d array's index (A -> B) // convert 1d array's index to 3d array's index (A -> B)
CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero.");
dsize_t idx = count % shape[shape.size() - 1 - j]; dsize_t idx = count % shape[shape.size() - 1 - j];
count /= shape[shape.size() - 1 - j]; count /= shape[shape.size() - 1 - j];
// calculate the raw data offset based on strides (B -> C) // calculate the raw data offset based on strides (B -> C)
offset += idx * strides[shape.size() - 1 - j]; offset += idx * strides[shape.size() - 1 - j];
// once count = 0, the following idxes are all zero, skip them // once count = 0, the following idxes are all zero, skip them
if (count == 0) break; if (count == 0) {
break;
}
} }
// strides already consider byte size of the data type, but dst doesn't. // strides already consider byte size of the data type, but dst doesn't.
// dst[i] = dst + i * type_size = src + offset // dst[i] = dst + i * type_size = src + offset
@ -482,6 +500,7 @@ void Tensor::Invalidate() {
template <typename T> template <typename T>
Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const { Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
RETURN_UNEXPECTED_IF_NULL(ptr);
if (type_.IsCompatible<T>()) { if (type_.IsCompatible<T>()) {
if (data_ == nullptr) { if (data_ == nullptr) {
std::string err = "Data is not allocated yet"; std::string err = "Data is not allocated yet";
@ -490,6 +509,7 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
dsize_t flat_idx; dsize_t flat_idx;
RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx)); RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
*ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes()); *ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
RETURN_UNEXPECTED_IF_NULL(ptr);
return Status::OK(); return Status::OK();
} else { } else {
@ -499,6 +519,8 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
} }
Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const { Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
RETURN_UNEXPECTED_IF_NULL(ptr);
RETURN_UNEXPECTED_IF_NULL(length);
if (type_ == DataType::DE_STRING) { if (type_ == DataType::DE_STRING) {
if (data_ == nullptr) { if (data_ == nullptr) {
std::string err = "Data is not allocated yet"; std::string err = "Data is not allocated yet";
@ -519,6 +541,8 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset
} }
Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) { Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
RETURN_UNEXPECTED_IF_NULL(start_addr_of_index);
RETURN_UNEXPECTED_IF_NULL(remaining);
if (type() == DataType::DE_STRING) { if (type() == DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet."); RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet.");
} }
@ -541,6 +565,7 @@ Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_
Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor, Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor,
const bool partial_insert) { const bool partial_insert) {
RETURN_UNEXPECTED_IF_NULL(tensor);
std::string err_msg; std::string err_msg;
if (partial_insert) { if (partial_insert) {
err_msg += (ind.size() != 1) err_msg += (ind.size() != 1)
@ -603,13 +628,14 @@ Status Tensor::ExpandDim(const dsize_t &axis) {
std::vector<dsize_t> Tensor::Strides() const { std::vector<dsize_t> Tensor::Strides() const {
std::vector<dsize_t> strides = shape_.Strides(); std::vector<dsize_t> strides = shape_.Strides();
uint8_t size = type_.SizeInBytes(); uint8_t size = type_.SizeInBytes();
std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; }); (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
return strides; return strides;
} }
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
RETURN_UNEXPECTED_IF_NULL(t); RETURN_UNEXPECTED_IF_NULL(t);
RETURN_UNEXPECTED_IF_NULL(out);
CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
std::string format_desc = t->type().GetPybindFormat(); std::string format_desc = t->type().GetPybindFormat();
@ -622,6 +648,7 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
t->Rank(), /* Number of dimensions */ t->Rank(), /* Number of dimensions */
t->shape().AsVector(), /* Buffer dimensions */ t->shape().AsVector(), /* Buffer dimensions */
t->Strides()); t->Strides());
RETURN_UNEXPECTED_IF_NULL(out);
return Status::OK(); return Status::OK();
} }
#endif #endif
@ -721,6 +748,7 @@ Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, st
template <typename T> template <typename T>
Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const { Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
RETURN_UNEXPECTED_IF_NULL(o);
if (data_ == nullptr) { if (data_ == nullptr) {
RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
} }
@ -794,6 +822,7 @@ Status Tensor::GetDataAsNumpy(py::array *data) {
return Status::OK(); return Status::OK();
} }
Status Tensor::GetDataAsNumpyStrings(py::array *data) { Status Tensor::GetDataAsNumpyStrings(py::array *data) {
RETURN_UNEXPECTED_IF_NULL(data);
auto itr = begin<std::string_view>(); auto itr = begin<std::string_view>();
uint64_t max_value = 0; uint64_t max_value = 0;
for (; itr != end<std::string_view>(); ++itr) { for (; itr != end<std::string_view>(); ++itr) {
@ -807,7 +836,9 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
max_value = (max_value == 0 ? 1 : max_value); max_value = (max_value == 0 ? 1 : max_value);
uint64_t total_size = shape_.NumOfElements() * max_value; uint64_t total_size = shape_.NumOfElements() * max_value;
char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size)); char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size));
if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array."); if (tmp_data == nullptr) {
RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
}
int ret_code = memset_s(tmp_data, total_size, 0, total_size); int ret_code = memset_s(tmp_data, total_size, 0, total_size);
CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory"); CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory");
@ -820,9 +851,10 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
} }
} }
auto strides = shape_.Strides(); auto strides = shape_.Strides();
std::transform(strides.begin(), strides.end(), strides.begin(), (void)std::transform(strides.begin(), strides.end(), strides.begin(),
[&max_value](const auto &s) { return s * max_value; }); [&max_value](const auto &s) { return s * max_value; });
*data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data); *data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data);
RETURN_UNEXPECTED_IF_NULL(data);
data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data)); data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
return Status::OK(); return Status::OK();
} }
@ -832,6 +864,7 @@ void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
template <typename T> template <typename T>
Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const { Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
RETURN_UNEXPECTED_IF_NULL(o);
if (data_ == nullptr) { if (data_ == nullptr) {
RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
} }
@ -873,6 +906,7 @@ Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
template <typename T> template <typename T>
Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const { Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
RETURN_UNEXPECTED_IF_NULL(o);
if (data_ == nullptr) { if (data_ == nullptr) {
RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
} }
@ -914,6 +948,7 @@ Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
template <typename T> template <typename T>
Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const { Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
RETURN_UNEXPECTED_IF_NULL(o);
if (data_ == nullptr) { if (data_ == nullptr) {
RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
} }
@ -958,6 +993,7 @@ Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length
return Status::OK(); return Status::OK();
} }
Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) { Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) {
RETURN_UNEXPECTED_IF_NULL(src);
CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type"); CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type");
CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0"); CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0");
@ -975,6 +1011,7 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vect
Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index, Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index,
SliceOption *slice_option_ptr) { SliceOption *slice_option_ptr) {
RETURN_UNEXPECTED_IF_NULL(slice_option_ptr);
if (slice_option.indices_.empty() && !slice_option.slice_.valid()) { if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty."); RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
} }
@ -983,6 +1020,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl
RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given."); RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
} }
CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should greater than slices index.");
// if slice object was provided, indices should be empty. Generate indices from the slice object. // if slice object was provided, indices should be empty. Generate indices from the slice object.
if (slice_option.indices_.empty()) { if (slice_option.indices_.empty()) {
// check if slice is valid // check if slice is valid
@ -1010,6 +1048,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl
} }
Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) { Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) {
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<SliceOption> converted_slice_objects; std::vector<SliceOption> converted_slice_objects;
CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()), CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()),
@ -1046,7 +1085,7 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption
for (int i = 0; i < shape_.Rank(); i++) { for (int i = 0; i < shape_.Rank(); i++) {
if (i < slice_len) { if (i < slice_len) {
// if it's a slice // if it's a slice
if (converted_slice_objects[i].indices_.size() == 0) { if (converted_slice_objects[i].indices_.size() == 0 && converted_slice_objects[i].slice_.step_ != 0) {
slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) / slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
converted_slice_objects[i].slice_.step_; converted_slice_objects[i].slice_.step_;
if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) % if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
@ -1085,8 +1124,10 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption
Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices, Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
const TensorShape &shape) { const TensorShape &shape) {
RETURN_UNEXPECTED_IF_NULL(out);
RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out)); RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
RETURN_UNEXPECTED_IF_NULL(out);
(*out)->GetMutableBuffer(); (*out)->GetMutableBuffer();
dsize_t out_index = 0; dsize_t out_index = 0;
std::vector<dsize_t> dim_length = shape_.AsVector(); std::vector<dsize_t> dim_length = shape_.AsVector();
@ -1131,6 +1172,7 @@ Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std:
} }
Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices, Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
const TensorShape &shape) { const TensorShape &shape) {
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<dsize_t> dim_length = shape_.AsVector(); std::vector<dsize_t> dim_length = shape_.AsVector();
std::vector<std::string> strings; std::vector<std::string> strings;

View File

@ -414,6 +414,10 @@ class Tensor {
/// \param[in] index_vector vector of indices /// \param[in] index_vector vector of indices
/// \return std::vector<dsize_t> modified vector of indices /// \return std::vector<dsize_t> modified vector of indices
static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) { static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) {
if (length.size() < index_vector.size()) {
MS_LOG(ERROR) << "The size of length should be greater than the shape of index_vector";
return {};
}
std::vector<dsize_t> indices(index_vector.size(), 0); std::vector<dsize_t> indices(index_vector.size(), 0);
for (int i = 0; i < index_vector.size(); i++) { for (int i = 0; i < index_vector.size(); i++) {
indices[i] = HandleNeg(index_vector[i], length[i]); indices[i] = HandleNeg(index_vector[i], length[i]);
@ -780,12 +784,14 @@ inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>()
template <> template <>
inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape, inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
TensorPtr *out) { TensorPtr *out) {
RETURN_UNEXPECTED_IF_NULL(out);
CHECK_FAIL_RETURN_UNEXPECTED( CHECK_FAIL_RETURN_UNEXPECTED(
items.size() == shape.NumOfElements(), items.size() == shape.NumOfElements(),
"Number of elements in the vector does not match the number of elements of the shape required"); "Number of elements in the vector does not match the number of elements of the shape required");
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}), *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
DataType(DataType::DE_STRING)); DataType(DataType::DE_STRING));
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
if (items.size() == 0) { if (items.size() == 0) {
if (shape.known()) { if (shape.known()) {
return (*out)->Reshape(shape); return (*out)->Reshape(shape);
@ -835,6 +841,7 @@ inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::strin
/// \return Status code /// \return Status code
template <> template <>
inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) { inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
RETURN_UNEXPECTED_IF_NULL(out);
return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out); return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
} }
} // namespace dataset } // namespace dataset

View File

@ -16,6 +16,8 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "minddata/dataset/core/tensor_helpers.h" #include "minddata/dataset/core/tensor_helpers.h"
#include "minddata/dataset/util/log_adapter.h"
#include "minddata/dataset/util/status.h"
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
@ -23,6 +25,10 @@ namespace dataset {
void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers, void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers,
const std::vector<mindspore::dataset::SliceOption> &slice_list, const std::vector<mindspore::dataset::SliceOption> &slice_list,
std::vector<std::vector<dsize_t>> *matrix) { std::vector<std::vector<dsize_t>> *matrix) {
if (numbers == nullptr || matrix == nullptr) {
MS_LOG(ERROR) << "Invalid input pointer, can't be NULL";
return;
}
// for loop changes if its an index instead of a slice object // for loop changes if its an index instead of a slice object
if (depth > 0) { if (depth > 0) {
int8_t new_depth = depth - 1; int8_t new_depth = depth - 1;

View File

@ -87,6 +87,7 @@ class TensorRow {
/// \param[out] output TensorRow /// \param[out] output TensorRow
template <typename T> template <typename T>
static Status ConvertToTensorRow(const std::vector<T> &o, TensorRow *output) { static Status ConvertToTensorRow(const std::vector<T> &o, TensorRow *output) {
RETURN_UNEXPECTED_IF_NULL(output);
DataType data_type = DataType::FromCType<T>(); DataType data_type = DataType::FromCType<T>();
if (data_type == DataType::DE_UNKNOWN) { if (data_type == DataType::DE_UNKNOWN) {
RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized."); RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized.");
@ -106,6 +107,7 @@ class TensorRow {
/// \param[out] output TensorRow /// \param[out] output TensorRow
template <typename T> template <typename T>
static Status ConvertToTensorRow(const T &o, TensorRow *output) { static Status ConvertToTensorRow(const T &o, TensorRow *output) {
RETURN_UNEXPECTED_IF_NULL(output);
DataType data_type = DataType::FromCType<T>(); DataType data_type = DataType::FromCType<T>();
if (data_type == DataType::DE_UNKNOWN) { if (data_type == DataType::DE_UNKNOWN) {
RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized."); RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized.");
@ -125,6 +127,7 @@ class TensorRow {
/// \param[out] o the primitive variable /// \param[out] o the primitive variable
template <typename T> template <typename T>
static Status ConvertFromTensorRow(const TensorRow &input, T *o) { static Status ConvertFromTensorRow(const TensorRow &input, T *o) {
RETURN_UNEXPECTED_IF_NULL(o);
DataType data_type = DataType::FromCType<T>(); DataType data_type = DataType::FromCType<T>();
RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type)); RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type));
if (input.at(0)->type() != data_type) { if (input.at(0)->type() != data_type) {
@ -142,6 +145,7 @@ class TensorRow {
/// \param[out] o vector of primitive variable /// \param[out] o vector of primitive variable
template <typename T> template <typename T>
static Status ConvertFromTensorRow(const TensorRow &input, std::vector<T> *o) { static Status ConvertFromTensorRow(const TensorRow &input, std::vector<T> *o) {
RETURN_UNEXPECTED_IF_NULL(o);
DataType data_type = DataType::FromCType<T>(); DataType data_type = DataType::FromCType<T>();
RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type)); RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type));
if (input.at(0)->Rank() != 1) if (input.at(0)->Rank() != 1)

View File

@ -40,7 +40,7 @@ bool multi_ok(dsize_t x, dsize_t y) {
} }
dsize_t TensorShape::NumOfElements() const { dsize_t TensorShape::NumOfElements() const {
if (!known()) { if (!known() && strides_.size() < 1) {
return 0; return 0;
} }
return strides_[0]; return strides_[0];
@ -216,12 +216,9 @@ py::list TensorShape::AsPyList() {
#endif #endif
TensorShape TensorShape::Squeeze() const { TensorShape TensorShape::Squeeze() const {
std::vector<dsize_t> new_shape; std::vector<dsize_t> new_shape(raw_shape_.size());
for (auto s : AsVector()) { auto it = std::copy_if(raw_shape_.begin(), raw_shape_.end(), new_shape.begin(), [](auto s) { return s != 1; });
if (s != 1) { new_shape.resize(std::distance(new_shape.begin(), it));
new_shape.push_back(s);
}
}
return TensorShape(new_shape); return TensorShape(new_shape);
} }
@ -230,6 +227,7 @@ std::vector<dsize_t> TensorShape::Strides() const { return std::vector<dsize_t>{
// Name: ToFlatIndex() // Name: ToFlatIndex()
// Description: convert a vector style index to number, used to access memory internal use only // Description: convert a vector style index to number, used to access memory internal use only
Status TensorShape::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const { Status TensorShape::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const {
RETURN_UNEXPECTED_IF_NULL(flat_index);
if (index.size() != raw_shape_.size()) { if (index.size() != raw_shape_.size()) {
std::stringstream ss; std::stringstream ss;
ss << "Index size (" << index.size() << ") does not match the shape size (" << raw_shape_.size() << ")."; ss << "Index size (" << index.size() << ") does not match the shape size (" << raw_shape_.size() << ").";

View File

@ -101,8 +101,8 @@ Status CacheServerHW::GetNumaNodeInfo() {
}; };
// Look for name starts with 'node' and followed by digits. // Look for name starts with 'node' and followed by digits.
const char kNodeName[] = "node"; const char kNodeName[] = "node";
while (it->hasNext()) { while (it->HasNext()) {
auto p = it->next(); auto p = it->Next();
const std::string entry = p.Basename(); const std::string entry = p.Basename();
const char *name = entry.data(); const char *name = entry.data();
if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) { if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) {

View File

@ -63,8 +63,8 @@ Status CachePool::DoServiceStop() {
if (!root_.toString().empty()) { if (!root_.toString().empty()) {
Path spill = GetSpillPath(); Path spill = GetSpillPath();
auto it = Path::DirIterator::OpenDirectory(&spill); auto it = Path::DirIterator::OpenDirectory(&spill);
while (it->hasNext()) { while (it->HasNext()) {
rc = it->next().Remove(); rc = it->Next().Remove();
if (rc.IsError() && rc2.IsOk()) { if (rc.IsError() && rc2.IsOk()) {
rc2 = rc; rc2 = rc;
} }

View File

@ -24,6 +24,7 @@ namespace mindspore::dataset {
PullBasedIteratorConsumer::PullBasedIteratorConsumer() { tree_adapter_lite_ = std::make_unique<TreeAdapterLite>(); } PullBasedIteratorConsumer::PullBasedIteratorConsumer() { tree_adapter_lite_ = std::make_unique<TreeAdapterLite>(); }
Status PullBasedIteratorConsumer::Init(std::shared_ptr<DatasetNode> root) { Status PullBasedIteratorConsumer::Init(std::shared_ptr<DatasetNode> root) {
RETURN_UNEXPECTED_IF_NULL(root);
return tree_adapter_lite_->BuildTree(std::move(root)); return tree_adapter_lite_->BuildTree(std::move(root));
} }

View File

@ -20,6 +20,7 @@
namespace mindspore::dataset { namespace mindspore::dataset {
Status PythonIteratorConsumer::GetNextAsList(py::list *out) { Status PythonIteratorConsumer::GetNextAsList(py::list *out) {
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<TensorPtr> row; std::vector<TensorPtr> row;
{ {
py::gil_scoped_release gil_release; py::gil_scoped_release gil_release;
@ -32,6 +33,7 @@ Status PythonIteratorConsumer::GetNextAsList(py::list *out) {
} }
Status PythonIteratorConsumer::GetNextAsDict(py::dict *out) { Status PythonIteratorConsumer::GetNextAsDict(py::dict *out) {
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<std::pair<std::string, std::shared_ptr<Tensor>>> vec; std::vector<std::pair<std::string, std::shared_ptr<Tensor>>> vec;
Status s; Status s;
{ {
@ -64,6 +66,8 @@ Status PythonTreeGetters::GetRow(TensorRow *const r) {
return TreeGetters::GetRow(r); return TreeGetters::GetRow(r);
} }
Status PythonDatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *r) { Status PythonDatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *r) {
RETURN_UNEXPECTED_IF_NULL(tree_adapter);
RETURN_UNEXPECTED_IF_NULL(r);
py::gil_scoped_release gil_release; py::gil_scoped_release gil_release;
return DatasetSizeGetter::GetRow(tree_adapter, r); return DatasetSizeGetter::GetRow(tree_adapter, r);
} }

View File

@ -13,7 +13,6 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <string> #include <string>
@ -179,6 +178,8 @@ Status ToDevice::Stop() {
} }
Status ToDevice::GetDataInfo(std::vector<DataType> *const types, std::vector<TensorShape> *const shapes) { Status ToDevice::GetDataInfo(std::vector<DataType> *const types, std::vector<TensorShape> *const shapes) {
RETURN_UNEXPECTED_IF_NULL(types);
RETURN_UNEXPECTED_IF_NULL(shapes);
// tree_.root() must be DeviceQueueOp // tree_.root() must be DeviceQueueOp
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr."); CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr.");
@ -218,8 +219,13 @@ Status SaveToDisk::ValidateParams() {
MS_LOG(ERROR) << err; MS_LOG(ERROR) << err;
RETURN_STATUS_SYNTAX_ERROR(err); RETURN_STATUS_SYNTAX_ERROR(err);
} }
auto parent_path = dir.ParentPath(); std::string real_path;
if (!parent_path.empty() && access(common::SafeCStr(parent_path), R_OK) == -1) { if (Path::RealPath(dir.ParentPath(), real_path).IsError()) {
std::string err_msg = "CreateSaver failed, can not get real dataset path: " + dir.ParentPath();
MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg);
}
if (access(dir.ParentPath().c_str(), R_OK) == -1) {
std::string err_msg = "CreateSaver failed, no access to specified dataset path: " + dataset_path_; std::string err_msg = "CreateSaver failed, no access to specified dataset path: " + dataset_path_;
MS_LOG(ERROR) << err_msg; MS_LOG(ERROR) << err_msg;
RETURN_STATUS_SYNTAX_ERROR(err_msg); RETURN_STATUS_SYNTAX_ERROR(err_msg);
@ -250,15 +256,15 @@ Status SaveToDisk::Save() {
auto mr_header = std::make_shared<mindrecord::ShardHeader>(); auto mr_header = std::make_shared<mindrecord::ShardHeader>();
auto mr_writer = std::make_unique<mindrecord::ShardWriter>(); auto mr_writer = std::make_unique<mindrecord::ShardWriter>();
std::vector<std::string> blob_fields; std::vector<std::string> blob_fields;
if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) { if (mindrecord::SUCCESS != mindrecord::ShardWriter::Initialize(&mr_writer, file_names)) {
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message."); RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message.");
} }
std::unordered_map<std::string, int32_t> column_name_id_map; std::unordered_map<std::string, int32_t> column_name_id_map;
for (auto el : tree_adapter_->GetColumnNameMap()) { for (auto el : tree_adapter_->GetColumnNameMap()) {
std::string column_name = el.first; std::string column_name = el.first;
std::transform(column_name.begin(), column_name.end(), column_name.begin(), (void)std::transform(column_name.begin(), column_name.end(), column_name.begin(),
[](unsigned char c) { return ispunct(c) ? '_' : c; }); [](unsigned char c) { return ispunct(c) ? '_' : c; });
column_name_id_map[column_name] = el.second; column_name_id_map[column_name] = el.second;
} }
@ -281,17 +287,21 @@ Status SaveToDisk::Save() {
RETURN_IF_NOT_OK(FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields)); RETURN_IF_NOT_OK(FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields));
MS_LOG(INFO) << "Schema of saved mindrecord: " << mr_json.dump(); MS_LOG(INFO) << "Schema of saved mindrecord: " << mr_json.dump();
if (mindrecord::SUCCESS != if (mindrecord::SUCCESS !=
mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) { mindrecord::ShardHeader::Initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader."); RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
} }
mr_writer->SetShardHeader(mr_header); if (mindrecord::SUCCESS != mr_writer->SetShardHeader(mr_header)) {
RETURN_STATUS_UNEXPECTED("Error: failed to set header of ShardWriter.");
}
first_loop = false; first_loop = false;
} }
// construct data // construct data
if (!row.empty()) { // write data if (!row.empty()) { // write data
RETURN_IF_NOT_OK(FetchDataFromTensorRow(row, column_name_id_map, &row_raw_data, &row_bin_data)); RETURN_IF_NOT_OK(FetchDataFromTensorRow(row, column_name_id_map, &row_raw_data, &row_bin_data));
std::shared_ptr<std::vector<uint8_t>> output_bin_data; std::shared_ptr<std::vector<uint8_t>> output_bin_data;
mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data); if (mindrecord::SUCCESS != mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data)) {
RETURN_STATUS_UNEXPECTED("Error: failed to merge blob data of ShardWriter.");
}
std::map<std::uint64_t, std::vector<nlohmann::json>> raw_data; std::map<std::uint64_t, std::vector<nlohmann::json>> raw_data;
raw_data.insert( raw_data.insert(
std::pair<uint64_t, std::vector<nlohmann::json>>(mr_schema_id, std::vector<nlohmann::json>{row_raw_data})); std::pair<uint64_t, std::vector<nlohmann::json>>(mr_schema_id, std::vector<nlohmann::json>{row_raw_data}));
@ -299,12 +309,16 @@ Status SaveToDisk::Save() {
if (output_bin_data != nullptr) { if (output_bin_data != nullptr) {
bin_data.emplace_back(*output_bin_data); bin_data.emplace_back(*output_bin_data);
} }
mr_writer->WriteRawData(raw_data, bin_data); if (mindrecord::SUCCESS != mr_writer->WriteRawData(raw_data, bin_data)) {
RETURN_STATUS_UNEXPECTED("Error: failed to write raw data to ShardWriter.");
}
} }
} while (!row.empty()); } while (!row.empty());
mr_writer->Commit(); if (mindrecord::SUCCESS != mr_writer->Commit()) {
if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::finalize(file_names)) { RETURN_STATUS_UNEXPECTED("Error: failed to commit ShardWriter.");
}
if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::Finalize(file_names)) {
RETURN_STATUS_UNEXPECTED("Error: failed to finalize ShardIndexGenerator."); RETURN_STATUS_UNEXPECTED("Error: failed to finalize ShardIndexGenerator.");
} }
return Status::OK(); return Status::OK();
@ -407,7 +421,7 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map<std::string,
return Status::OK(); return Status::OK();
} }
static Status ValidateInputParams(nlohmann::json *row_raw_data, inline Status ValidateInputParams(nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data, std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data,
const std::unordered_map<std::string, int32_t> &column_name_id_map) { const std::unordered_map<std::string, int32_t> &column_name_id_map) {
if (row_raw_data == nullptr) { if (row_raw_data == nullptr) {
@ -424,6 +438,8 @@ static Status ValidateInputParams(nlohmann::json *row_raw_data,
Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
std::unique_ptr<std::vector<uint8_t>> *data_ptr) { std::unique_ptr<std::vector<uint8_t>> *data_ptr) {
RETURN_UNEXPECTED_IF_NULL(row_raw_data);
RETURN_UNEXPECTED_IF_NULL(data_ptr);
auto column_type = tensor->type(); auto column_type = tensor->type();
Status s; Status s;
if (column_type == DataType::DE_FLOAT32) { if (column_type == DataType::DE_FLOAT32) {
@ -442,6 +458,9 @@ Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string co
Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data, Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) { std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
RETURN_UNEXPECTED_IF_NULL(tensor);
RETURN_UNEXPECTED_IF_NULL(row_raw_data);
RETURN_UNEXPECTED_IF_NULL(row_bin_data);
auto column_type = tensor->type(); auto column_type = tensor->type();
Status s; Status s;
std::unique_ptr<std::vector<uint8_t>> data_ptr; std::unique_ptr<std::vector<uint8_t>> data_ptr;
@ -492,7 +511,6 @@ Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string col
RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {})); // assume scalar string tensor RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {})); // assume scalar string tensor
std::string ss(sv); std::string ss(sv);
(*row_raw_data)[column_name] = std::move(ss); (*row_raw_data)[column_name] = std::move(ss);
return Status::OK();
} else { } else {
RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data."); RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data.");
} }
@ -506,6 +524,8 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
const std::unordered_map<std::string, int32_t> &column_name_id_map, const std::unordered_map<std::string, int32_t> &column_name_id_map,
nlohmann::json *row_raw_data, nlohmann::json *row_raw_data,
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) { std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
RETURN_UNEXPECTED_IF_NULL(row_raw_data);
RETURN_UNEXPECTED_IF_NULL(row_bin_data);
Status s; Status s;
s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map); s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map);
if (s.IsError()) { if (s.IsError()) {
@ -525,9 +545,11 @@ template <typename T, typename S>
Status SaveToDisk::TransformTensor(const unsigned char *src, const TensorShape &shape, const int64_t num_of_elements, Status SaveToDisk::TransformTensor(const unsigned char *src, const TensorShape &shape, const int64_t num_of_elements,
std::unique_ptr<T> *data, std::unique_ptr<std::vector<uint8_t>> *data_ptr, std::unique_ptr<T> *data, std::unique_ptr<std::vector<uint8_t>> *data_ptr,
std::unique_ptr<S> *s, bool need_convert) { std::unique_ptr<S> *s, bool need_convert) {
if (nullptr == src) { RETURN_UNEXPECTED_IF_NULL(src);
RETURN_STATUS_UNEXPECTED("Error: buffer of Tensor is NULL."); RETURN_UNEXPECTED_IF_NULL(data);
} RETURN_UNEXPECTED_IF_NULL(data_ptr);
RETURN_UNEXPECTED_IF_NULL(s);
*data_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(T)); *data_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(T));
if (need_convert) { if (need_convert) {
auto tmp_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(S)); auto tmp_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(S));
@ -560,25 +582,32 @@ TreeGetters::TreeGetters() : dataset_size_(-1), init_flag_(false), first_row_obt
} }
Status TreeGetters::Init(std::shared_ptr<DatasetNode> d) { Status TreeGetters::Init(std::shared_ptr<DatasetNode> d) {
RETURN_UNEXPECTED_IF_NULL(d);
root_ = std::move(d); root_ = std::move(d);
return Status::OK(); return Status::OK();
} }
Status TreeGetters::GetRow(TensorRow *row) { return tree_adapter_->GetNext(row); } Status TreeGetters::GetRow(TensorRow *row) {
RETURN_UNEXPECTED_IF_NULL(row);
return tree_adapter_->GetNext(row);
}
Status TreeGetters::GetOutputTypes(std::vector<DataType> *types) { Status TreeGetters::GetOutputTypes(std::vector<DataType> *types) {
RETURN_UNEXPECTED_IF_NULL(types);
RETURN_IF_NOT_OK(GetFirstRowShapeAndType()); RETURN_IF_NOT_OK(GetFirstRowShapeAndType());
*types = first_row_type_; *types = first_row_type_;
return Status::OK(); return Status::OK();
} }
Status TreeGetters::GetOutputShapes(std::vector<TensorShape> *shapes) { Status TreeGetters::GetOutputShapes(std::vector<TensorShape> *shapes) {
RETURN_UNEXPECTED_IF_NULL(shapes);
RETURN_IF_NOT_OK(GetFirstRowShapeAndType()); RETURN_IF_NOT_OK(GetFirstRowShapeAndType());
*shapes = first_row_shape_; *shapes = first_row_shape_;
return Status::OK(); return Status::OK();
} }
Status TreeGetters::GetBatchSize(int64_t *batch_size) { Status TreeGetters::GetBatchSize(int64_t *batch_size) {
RETURN_UNEXPECTED_IF_NULL(batch_size);
RETURN_IF_NOT_OK(InternalInit()); RETURN_IF_NOT_OK(InternalInit());
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
RETURN_UNEXPECTED_IF_NULL(root); RETURN_UNEXPECTED_IF_NULL(root);
@ -588,6 +617,7 @@ Status TreeGetters::GetBatchSize(int64_t *batch_size) {
} }
Status TreeGetters::GetRepeatCount(int64_t *repeat_count) { Status TreeGetters::GetRepeatCount(int64_t *repeat_count) {
RETURN_UNEXPECTED_IF_NULL(repeat_count);
RETURN_IF_NOT_OK(InternalInit()); RETURN_IF_NOT_OK(InternalInit());
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
RETURN_UNEXPECTED_IF_NULL(root); RETURN_UNEXPECTED_IF_NULL(root);
@ -596,6 +626,7 @@ Status TreeGetters::GetRepeatCount(int64_t *repeat_count) {
} }
Status TreeGetters::GetNumClasses(int64_t *num_classes) { Status TreeGetters::GetNumClasses(int64_t *num_classes) {
RETURN_UNEXPECTED_IF_NULL(num_classes);
RETURN_IF_NOT_OK(InternalInit()); RETURN_IF_NOT_OK(InternalInit());
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
RETURN_UNEXPECTED_IF_NULL(root); RETURN_UNEXPECTED_IF_NULL(root);
@ -604,6 +635,7 @@ Status TreeGetters::GetNumClasses(int64_t *num_classes) {
} }
Status TreeGetters::GetColumnNames(std::vector<std::string> *output) { Status TreeGetters::GetColumnNames(std::vector<std::string> *output) {
RETURN_UNEXPECTED_IF_NULL(output);
RETURN_IF_NOT_OK(InternalInit()); RETURN_IF_NOT_OK(InternalInit());
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
RETURN_UNEXPECTED_IF_NULL(root); RETURN_UNEXPECTED_IF_NULL(root);
@ -620,6 +652,7 @@ Status TreeGetters::GetColumnNames(std::vector<std::string> *output) {
} }
Status TreeGetters::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) { Status TreeGetters::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
RETURN_IF_NOT_OK(InternalInit()); RETURN_IF_NOT_OK(InternalInit());
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot()); std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
RETURN_UNEXPECTED_IF_NULL(root); RETURN_UNEXPECTED_IF_NULL(root);
@ -671,6 +704,7 @@ Status DatasetSizeGetter::Init(std::shared_ptr<DatasetNode> d) {
return Status::OK(); return Status::OK();
} }
Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *dataset_size) { Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *dataset_size) {
RETURN_UNEXPECTED_IF_NULL(dataset_size);
std::shared_ptr<TreeAdapter> tree_adapter = std::make_shared<TreeAdapter>(TreeAdapter::UsageFlag::kDeGetter); std::shared_ptr<TreeAdapter> tree_adapter = std::make_shared<TreeAdapter>(TreeAdapter::UsageFlag::kDeGetter);
tree_adapters_.push_back(tree_adapter); tree_adapters_.push_back(tree_adapter);
RETURN_IF_NOT_OK(tree_adapter->Compile(ir_node, 1)); RETURN_IF_NOT_OK(tree_adapter->Compile(ir_node, 1));
@ -685,6 +719,7 @@ Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *
return Status::OK(); return Status::OK();
} }
Status DatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *row) { Status DatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *row) {
RETURN_UNEXPECTED_IF_NULL(row);
return tree_adapter->GetNext(row); return tree_adapter->GetNext(row);
} }
Status DatasetSizeGetter::Terminate() { Status DatasetSizeGetter::Terminate() {

View File

@ -73,7 +73,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten
ColDescriptor::ColDescriptor(const ColDescriptor &in_cd) ColDescriptor::ColDescriptor(const ColDescriptor &in_cd)
: type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) { : type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) {
// If it has a tensor shape, make a copy of it with our own unique_ptr. // If it has a tensor shape, make a copy of it with our own unique_ptr.
tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr; tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr;
} }
// Assignment overload // Assignment overload
@ -84,7 +84,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) {
tensor_impl_ = in_cd.tensor_impl_; tensor_impl_ = in_cd.tensor_impl_;
col_name_ = in_cd.col_name_; col_name_ = in_cd.col_name_;
// If it has a tensor shape, make a copy of it with our own unique_ptr. // If it has a tensor shape, make a copy of it with our own unique_ptr.
tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr; tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr;
} }
return *this; return *this;
} }
@ -113,7 +113,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape *
// If the shape is not given in this column, then we assume the shape will be: {numElements} // If the shape is not given in this column, then we assume the shape will be: {numElements}
if (tensor_shape_ == nullptr) { if (tensor_shape_ == nullptr) {
if (this->rank() == 0 && num_elements == 1) { if (this->Rank() == 0 && num_elements == 1) {
*out_shape = TensorShape::CreateScalar(); *out_shape = TensorShape::CreateScalar();
return Status::OK(); return Status::OK();
} }
@ -173,7 +173,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape *
} }
// getter function for the shape // getter function for the shape
TensorShape ColDescriptor::shape() const { TensorShape ColDescriptor::Shape() const {
if (tensor_shape_ != nullptr) { if (tensor_shape_ != nullptr) {
return *tensor_shape_; // copy construct a shape to return return *tensor_shape_; // copy construct a shape to return
} else { } else {
@ -257,7 +257,7 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector
} }
// Internal helper function for parsing shape info and building a vector for the shape construction. // Internal helper function for parsing shape info and building a vector for the shape construction.
static Status buildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) { static Status BuildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) {
if (outShape == nullptr) { if (outShape == nullptr) {
RETURN_STATUS_UNEXPECTED("null output shape"); RETURN_STATUS_UNEXPECTED("null output shape");
} }
@ -274,7 +274,8 @@ static Status buildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *o
Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::string &col_name) { Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::string &col_name) {
int32_t rank_value = -1; int32_t rank_value = -1;
TensorImpl t_impl_value = TensorImpl::kFlexible; TensorImpl t_impl_value = TensorImpl::kFlexible;
std::string name, type_str; std::string name = "";
std::string type_str = "";
std::vector<dsize_t> tmp_shape = {}; std::vector<dsize_t> tmp_shape = {};
bool shape_field_exists = false; bool shape_field_exists = false;
// Iterate over this column's attributes. // Iterate over this column's attributes.
@ -291,7 +292,7 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin
STR_TO_TENSORIMPL(it_child.value(), t_impl_value); STR_TO_TENSORIMPL(it_child.value(), t_impl_value);
} else if (it_child.key() == "shape") { } else if (it_child.key() == "shape") {
shape_field_exists = true; shape_field_exists = true;
RETURN_IF_NOT_OK(buildShape(it_child.value(), &tmp_shape)); RETURN_IF_NOT_OK(BuildShape(it_child.value(), &tmp_shape));
} else { } else {
std::string err_msg = "Unexpected column attribute " + it_child.key() + " for column " + col_name; std::string err_msg = "Unexpected column attribute " + it_child.key() + " for column " + col_name;
RETURN_STATUS_UNEXPECTED(err_msg); RETURN_STATUS_UNEXPECTED(err_msg);
@ -324,10 +325,10 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin
// Create the column descriptor for this column from the data we pulled from the json file // Create the column descriptor for this column from the data we pulled from the json file
TensorShape col_shape = TensorShape(tmp_shape); TensorShape col_shape = TensorShape(tmp_shape);
if (shape_field_exists) if (shape_field_exists)
(void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape)); RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape)));
else else
// Create a column descriptor that doesn't have a shape // Create a column descriptor that doesn't have a shape
(void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value)); RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value)));
return Status::OK(); return Status::OK();
} }
@ -345,19 +346,30 @@ Status DataSchema::LoadSchemaFile(const std::string &schema_file_path,
} catch (nlohmann::json::out_of_range &e) { } catch (nlohmann::json::out_of_range &e) {
num_rows_ = 0; num_rows_ = 0;
} catch (nlohmann::json::exception &e) { } catch (nlohmann::json::exception &e) {
in.close();
RETURN_STATUS_UNEXPECTED("Unable to parse \"numRows\" from schema"); RETURN_STATUS_UNEXPECTED("Unable to parse \"numRows\" from schema");
} }
nlohmann::json column_tree = js.at("columns"); nlohmann::json column_tree = js.at("columns");
if (column_tree.empty()) { if (column_tree.empty()) {
in.close();
RETURN_STATUS_UNEXPECTED("columns is null"); RETURN_STATUS_UNEXPECTED("columns is null");
} }
if (columns_to_load.empty()) { if (columns_to_load.empty()) {
// Parse the json tree and load the schema's columns in whatever order that the json // Parse the json tree and load the schema's columns in whatever order that the json
// layout decides // layout decides
RETURN_IF_NOT_OK(this->AnyOrderLoad(column_tree)); Status rc = this->AnyOrderLoad(column_tree);
if (rc.IsError()) {
in.close();
return rc;
}
} else { } else {
RETURN_IF_NOT_OK(this->ColumnOrderLoad(column_tree, columns_to_load)); Status rc = this->ColumnOrderLoad(column_tree, columns_to_load);
if (rc.IsError()) {
in.close();
return rc;
}
} }
in.close();
} catch (const std::exception &err) { } catch (const std::exception &err) {
// Catch any exception and convert to Status return code // Catch any exception and convert to Status return code
RETURN_STATUS_UNEXPECTED("Schema file failed to load with JSON tools. File is: " + schema_file_path); RETURN_STATUS_UNEXPECTED("Schema file failed to load with JSON tools. File is: " + schema_file_path);
@ -394,7 +406,7 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string,
DataSchema::~DataSchema() = default; DataSchema::~DataSchema() = default;
// Getter for the ColDescriptor by index // Getter for the ColDescriptor by index
const ColDescriptor &DataSchema::column(int32_t idx) const { const ColDescriptor &DataSchema::Column(int32_t idx) const {
MS_ASSERT(idx < static_cast<int>(col_descs_.size())); MS_ASSERT(idx < static_cast<int>(col_descs_.size()));
return col_descs_[idx]; return col_descs_[idx];
} }
@ -411,9 +423,9 @@ void DataSchema::Print(std::ostream &out) const {
Status DataSchema::AddColumn(const ColDescriptor &cd) { Status DataSchema::AddColumn(const ColDescriptor &cd) {
// Sanity check there's not a duplicate name before adding the column // Sanity check there's not a duplicate name before adding the column
for (auto i = 0; i < col_descs_.size(); ++i) { for (auto i = 0; i < col_descs_.size(); ++i) {
if (col_descs_[i].name() == cd.name()) { if (col_descs_[i].Name() == cd.Name()) {
std::ostringstream ss; std::ostringstream ss;
ss << "column name '" << cd.name() << "' already exists in schema."; ss << "column name '" << cd.Name() << "' already exists in schema.";
std::string err_msg = ss.str(); std::string err_msg = ss.str();
RETURN_STATUS_UNEXPECTED(err_msg); RETURN_STATUS_UNEXPECTED(err_msg);
} }
@ -439,11 +451,11 @@ Status DataSchema::GetColumnNameMap(std::unordered_map<std::string, int32_t> *ou
} }
for (size_t i = 0; i < col_descs_.size(); ++i) { for (size_t i = 0; i < col_descs_.size(); ++i) {
if (col_descs_[i].name().empty()) { if (col_descs_[i].Name().empty()) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
"Constructing column name map from schema, but found empty column name."); "Constructing column name map from schema, but found empty column name.");
} }
(*out_column_name_map)[col_descs_[i].name()] = i; (*out_column_name_map)[col_descs_[i].Name()] = i;
} }
return Status::OK(); return Status::OK();

View File

@ -81,27 +81,27 @@ class ColDescriptor {
/// \brief getter function /// \brief getter function
/// \return The column's DataType /// \return The column's DataType
DataType type() const { return type_; } DataType Type() const { return type_; }
/// \brief getter function /// \brief getter function
/// \return The column's rank /// \return The column's rank
int32_t rank() const { return rank_; } int32_t Rank() const { return rank_; }
/// \brief getter function /// \brief getter function
/// \return The column's name /// \return The column's name
std::string name() const { return col_name_; } std::string Name() const { return col_name_; }
/// \brief getter function /// \brief getter function
/// \return The column's shape /// \return The column's shape
TensorShape shape() const; TensorShape Shape() const;
/// \brief getter function /// \brief getter function
/// \return TF if the column has an assigned fixed shape. /// \return TF if the column has an assigned fixed shape.
bool hasShape() const { return tensor_shape_ != nullptr; } bool HasShape() const { return tensor_shape_ != nullptr; }
/// \brief getter function /// \brief getter function
/// \return The column's tensor implementation type /// \return The column's tensor implementation type
TensorImpl tensorImpl() const { return tensor_impl_; } TensorImpl GetTensorImpl() const { return tensor_impl_; }
private: private:
DataType type_; // The columns type DataType type_; // The columns type
@ -153,7 +153,7 @@ class DataSchema {
/// \brief getter /// \brief getter
/// \return The reference to a ColDescriptor to get (const version) /// \return The reference to a ColDescriptor to get (const version)
const ColDescriptor &column(int32_t idx) const; const ColDescriptor &Column(int32_t idx) const;
/// \brief getter /// \brief getter
/// \return The number of columns in the schema /// \return The number of columns in the schema
@ -163,7 +163,7 @@ class DataSchema {
/// \brief getter /// \brief getter
/// \return The number of rows read from schema /// \return The number of rows read from schema
int64_t num_rows() const { return num_rows_; } int64_t NumRows() const { return num_rows_; }
static const char DEFAULT_DATA_SCHEMA_FILENAME[]; static const char DEFAULT_DATA_SCHEMA_FILENAME[];

View File

@ -14,6 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
#include "minddata/dataset/engine/dataset_iterator.h" #include "minddata/dataset/engine/dataset_iterator.h"
#include <algorithm>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include "minddata/dataset/core/data_type.h" #include "minddata/dataset/core/data_type.h"

View File

@ -49,7 +49,7 @@ class DatasetIterator {
// @return The string to column id mapping. // @return The string to column id mapping.
std::unordered_map<std::string, int32_t> GetColumnNameMap() const; std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
bool eof_handled() const { return eof_handled_; } bool EofHandled() const { return eof_handled_; }
// Fetches one row of data from the iterator. // Fetches one row of data from the iterator.
// the base class version simply performs error handling and returns empty row. Actual // the base class version simply performs error handling and returns empty row. Actual
@ -108,11 +108,11 @@ class ChildIterator {
std::unordered_map<std::string, int32_t> GetColumnNameMap() const; std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
// Return T/F if end of epoch // Return T/F if end of epoch
bool end_of_epoch() { return end_epoch_; } bool EndOfEpoch() { return end_epoch_; }
// Getter // Getter
// @return T/F if this iterator is completely done after getting an eof // @return T/F if this iterator is completely done after getting an eof
bool eof_handled() const { return eof_handled_; } bool EofHandled() const { return eof_handled_; }
private: private:
DatasetOp *current_op_; // The parent operator. We consume from it's children. DatasetOp *current_op_; // The parent operator. We consume from it's children.

View File

@ -113,6 +113,7 @@ Status BarrierOp::blockCond() {
// fetches next Barrier row // fetches next Barrier row
Status BarrierOp::getNextTensorRow(TensorRow *new_row) { Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
RETURN_UNEXPECTED_IF_NULL(new_row);
// iterate over all iterators and generate a row // iterate over all iterators and generate a row
RETURN_IF_NOT_OK((child_iterator_)->FetchNextTensorRow(new_row)); RETURN_IF_NOT_OK((child_iterator_)->FetchNextTensorRow(new_row));
// add each new row to iterator, check if row is empty, if row from iterator is empty return empty row // add each new row to iterator, check if row is empty, if row from iterator is empty return empty row
@ -122,7 +123,7 @@ Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
MS_LOG(INFO) << "Barrier operator child iterator produced empty row."; MS_LOG(INFO) << "Barrier operator child iterator produced empty row.";
clean_up_ = true; clean_up_ = true;
// If we picked up an eof here, then we are completely done. // If we picked up an eof here, then we are completely done.
if ((child_iterator_)->eof_handled()) { if ((child_iterator_)->EofHandled()) {
MS_LOG(INFO) << "Barrier operator iterator got EOF."; MS_LOG(INFO) << "Barrier operator iterator got EOF.";
eof_ = true; eof_ = true;
} }

View File

@ -36,6 +36,7 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
} }
Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) { Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
RETURN_UNEXPECTED_IF_NULL(ptr);
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
*ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, *ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
builder_num_workers_, builder_in_names_, builder_out_names_, builder_num_workers_, builder_in_names_, builder_out_names_,
@ -106,7 +107,7 @@ Status BatchOp::operator()() {
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
int32_t cur_batch_size = 0; int32_t cur_batch_size = 0;
RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0))); RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0)));
while (child_iterator_->eof_handled() == false) { while (child_iterator_->EofHandled() == false) {
while (new_row.empty() == false) { while (new_row.empty() == false) {
table->emplace_back(new_row); table->emplace_back(new_row);
// if # of rows is enough to make 1 batch, send it to worker_queue // if # of rows is enough to make 1 batch, send it to worker_queue
@ -142,7 +143,7 @@ Status BatchOp::operator()() {
<< "reduce memory usage."; << "reduce memory usage.";
} }
#endif #endif
} // end of eof_handled() == false } // end of EofHandled() == false
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(
worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOF)))); worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOF))));
// EOF received, send quit signal to all workers // EOF received, send quit signal to all workers
@ -168,6 +169,8 @@ void BatchOp::Print(std::ostream &out, bool show_all) const {
} }
Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *dest, dsize_t batch_size) { Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *dest, dsize_t batch_size) {
RETURN_UNEXPECTED_IF_NULL(src);
RETURN_UNEXPECTED_IF_NULL(dest);
if ((*src)->size() != batch_size) { if ((*src)->size() != batch_size) {
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Source table size does not match the batch_size."); RETURN_STATUS_UNEXPECTED("[Internal ERROR] Source table size does not match the batch_size.");
} }
@ -274,6 +277,8 @@ Status BatchOp::EoeReceived(int32_t) {
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) { Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
RETURN_UNEXPECTED_IF_NULL(table_pair);
RETURN_UNEXPECTED_IF_NULL(table_pair->first);
std::unique_ptr<TensorQTable> in_q_table = std::move(table_pair->first); std::unique_ptr<TensorQTable> in_q_table = std::move(table_pair->first);
size_t num_rows = in_q_table->size(); size_t num_rows = in_q_table->size();
auto out_q_table = std::make_unique<TensorQTable>(num_rows, TensorRow(column_name_id_map_.size(), nullptr)); auto out_q_table = std::make_unique<TensorQTable>(num_rows, TensorRow(column_name_id_map_.size(), nullptr));
@ -316,6 +321,7 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
#endif #endif
Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
RETURN_UNEXPECTED_IF_NULL(batch_size);
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
if (batch_size_func_) { if (batch_size_func_) {
RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info)); RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
@ -330,6 +336,7 @@ Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
RETURN_UNEXPECTED_IF_NULL(batch_size);
{ {
// Acquire Python GIL // Acquire Python GIL
py::gil_scoped_acquire gil_acquire; py::gil_scoped_acquire gil_acquire;
@ -355,6 +362,8 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
} }
Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info) { Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info) {
RETURN_UNEXPECTED_IF_NULL(input);
RETURN_UNEXPECTED_IF_NULL(output);
{ {
// Acquire Python GIL // Acquire Python GIL
py::gil_scoped_acquire gil_acquire; py::gil_scoped_acquire gil_acquire;
@ -471,6 +480,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
const std::unordered_map<std::string, int32_t> &column_name_id_map, const std::unordered_map<std::string, int32_t> &column_name_id_map,
std::set<int32_t> *pad_cols, std::vector<std::shared_ptr<Tensor>> *pad_vals, std::set<int32_t> *pad_cols, std::vector<std::shared_ptr<Tensor>> *pad_vals,
std::vector<std::vector<dsize_t>> *pad_shapes) { std::vector<std::vector<dsize_t>> *pad_shapes) {
RETURN_UNEXPECTED_IF_NULL(pad_cols);
RETURN_UNEXPECTED_IF_NULL(pad_vals);
RETURN_UNEXPECTED_IF_NULL(pad_shapes);
if (pad_info.empty()) { // if pad_info empty, pad every columns automatically if (pad_info.empty()) { // if pad_info empty, pad every columns automatically
for (size_t col_id = 0; col_id < column_name_id_map.size(); col_id++) { for (size_t col_id = 0; col_id < column_name_id_map.size(); col_id++) {
pad_cols->insert(col_id); pad_cols->insert(col_id);
@ -561,6 +573,7 @@ int64_t BatchOp::GetTreeBatchSize() {
} }
Status BatchOp::GetNextRowPullMode(TensorRow *const row) { Status BatchOp::GetNextRowPullMode(TensorRow *const row) {
RETURN_UNEXPECTED_IF_NULL(row);
std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>(); std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0); child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
int32_t cur_batch_size = 0; int32_t cur_batch_size = 0;

View File

@ -60,7 +60,7 @@ Status BucketBatchByLengthOp::operator()() {
TensorRow current_row; TensorRow current_row;
child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0); child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&current_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&current_row));
while (!child_iterator_->eof_handled()) { while (!child_iterator_->EofHandled()) {
while (!current_row.empty()) { while (!current_row.empty()) {
int32_t element_length; int32_t element_length;
RETURN_IF_NOT_OK(ObtainElementLength(&element_length, current_row)); RETURN_IF_NOT_OK(ObtainElementLength(&element_length, current_row));
@ -99,6 +99,7 @@ Status BucketBatchByLengthOp::operator()() {
} }
Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, TensorRow element) { Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, TensorRow element) {
RETURN_UNEXPECTED_IF_NULL(out_element_length);
// call pyfunc here if given pyfunc, otherwise return 0th dimension of shape of // call pyfunc here if given pyfunc, otherwise return 0th dimension of shape of
// the single column specified in length_dependent_columns_ // the single column specified in length_dependent_columns_
if (element_length_function_) { if (element_length_function_) {

View File

@ -52,7 +52,7 @@ Status BuildSentencePieceVocabOp::operator()() {
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
bool eoe_warning = false; // give out warning if receive more than 1 eoe bool eoe_warning = false; // give out warning if receive more than 1 eoe
while (child_iterator_->eof_handled() == false) { while (child_iterator_->EofHandled() == false) {
while (new_row.empty() == false) { while (new_row.empty() == false) {
RETURN_IF_NOT_OK(sentence_queue_->EmplaceBack(new_row)); RETURN_IF_NOT_OK(sentence_queue_->EmplaceBack(new_row));
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));

View File

@ -107,7 +107,7 @@ Status BuildVocabOp::operator()() {
} }
} }
bool eoe_warning = false; // give out warning if receive more than 1 eoe bool eoe_warning = false; // give out warning if receive more than 1 eoe
while (child_iterator_->eof_handled() == false) { while (child_iterator_->EofHandled() == false) {
while (new_row.empty() == false) { while (new_row.empty() == false) {
RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(new_row)); RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(new_row));
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));

View File

@ -593,7 +593,7 @@ Status DeviceQueueOp::SendDataToCPU() {
MS_LOG(INFO) << "Device queue, sending data to CPU."; MS_LOG(INFO) << "Device queue, sending data to CPU.";
int64_t total_batch = 0; int64_t total_batch = 0;
while (!(child_iterator_->eof_handled())) { while (!(child_iterator_->EofHandled())) {
TensorRow curr_row; TensorRow curr_row;
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row));

View File

@ -62,7 +62,7 @@ Status FilterOp::operator()() {
TensorRow new_row; TensorRow new_row;
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
int64_t cnt = 0; int64_t cnt = 0;
while (child_iterator_->eof_handled() == false) { while (child_iterator_->EofHandled() == false) {
while (new_row.empty() == false) { while (new_row.empty() == false) {
RETURN_IF_NOT_OK(worker_queues_[cnt % num_workers_]->EmplaceBack(new_row)); RETURN_IF_NOT_OK(worker_queues_[cnt % num_workers_]->EmplaceBack(new_row));
cnt++; cnt++;

View File

@ -124,7 +124,7 @@ Status ShuffleOp::operator()() {
RETURN_IF_NOT_OK(InitShuffleBuffer()); RETURN_IF_NOT_OK(InitShuffleBuffer());
// This is our main loop exit condition, when the iterator has no more data completely. // This is our main loop exit condition, when the iterator has no more data completely.
if (child_iterator_->eof_handled()) { if (child_iterator_->EofHandled()) {
RETURN_IF_NOT_OK(out_connector_->SendEOF()); RETURN_IF_NOT_OK(out_connector_->SendEOF());
break; break;
} }
@ -214,7 +214,7 @@ Status ShuffleOp::InitShuffleBuffer() {
TensorRow new_row; TensorRow new_row;
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
if (child_iterator_->eof_handled()) { if (child_iterator_->EofHandled()) {
MS_LOG(DEBUG) << "Shuffle operator init picked up EOF. No more epochs."; MS_LOG(DEBUG) << "Shuffle operator init picked up EOF. No more epochs.";
RETURN_IF_NOT_OK(out_connector_->SendEOF()); RETURN_IF_NOT_OK(out_connector_->SendEOF());
return Status::OK(); return Status::OK();

View File

@ -43,7 +43,7 @@ AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, boo
curr_row_(0) { curr_row_(0) {
// Set the column name map (base class field) // Set the column name map (base class field)
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
io_block_queues_.Init(num_workers_, queue_size); io_block_queues_.Init(num_workers_, queue_size);
} }
@ -70,8 +70,8 @@ Status AlbumOp::PrescanEntry() {
} }
MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << "."; MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";
while (dirItr->hasNext()) { while (dirItr->HasNext()) {
Path file = dirItr->next(); Path file = dirItr->Next();
if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
(void)image_rows_.push_back(file.toString().substr(dirname_offset_)); (void)image_rows_.push_back(file.toString().substr(dirname_offset_));
} else { } else {
@ -192,7 +192,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num
Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
TensorPtr label; TensorPtr label;
// consider templating this function to handle all ints // consider templating this function to handle all ints
if (data_schema_->column(col_num).type() == DataType::DE_INT64) { if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
std::vector<int64_t> data; std::vector<int64_t> data;
// Iterate over the integer list and add those values to the output shape tensor // Iterate over the integer list and add those values to the output shape tensor
@ -201,7 +201,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
std::vector<int32_t> data; std::vector<int32_t> data;
// Iterate over the integer list and add those values to the output shape tensor // Iterate over the integer list and add those values to the output shape tensor
@ -212,7 +212,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
} else { } else {
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " + RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " +
data_schema_->column(col_num).type().ToString()); data_schema_->Column(col_num).Type().ToString());
} }
row->push_back(std::move(label)); row->push_back(std::move(label));
return Status::OK(); return Status::OK();
@ -221,7 +221,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
TensorPtr float_array; TensorPtr float_array;
// consider templating this function to handle all ints // consider templating this function to handle all ints
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
std::vector<double> data; std::vector<double> data;
// Iterate over the integer list and add those values to the output shape tensor // Iterate over the integer list and add those values to the output shape tensor
@ -230,7 +230,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
std::vector<float> data; std::vector<float> data;
// Iterate over the integer list and add those values to the output shape tensor // Iterate over the integer list and add those values to the output shape tensor
@ -241,14 +241,15 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
} else { } else {
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " + RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " +
data_schema_->column(col_num).type().ToString()); data_schema_->Column(col_num).Type().ToString());
} }
row->push_back(std::move(float_array)); row->push_back(std::move(float_array));
return Status::OK(); return Status::OK();
} }
Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) { Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) {
if (data_schema_->column(col_num).type() == DataType::DE_STRING) { RETURN_UNEXPECTED_IF_NULL(row);
if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) {
TensorPtr id; TensorPtr id;
RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id)); RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id));
row->push_back(std::move(id)); row->push_back(std::move(id));
@ -266,7 +267,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow
Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) { Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) {
// hack to get the file name without extension, the 1 is to get rid of the backslash character // hack to get the file name without extension, the 1 is to get rid of the backslash character
TensorPtr empty_tensor; TensorPtr empty_tensor;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), &empty_tensor));
row->push_back(std::move(empty_tensor)); row->push_back(std::move(empty_tensor));
return Status::OK(); return Status::OK();
} }
@ -277,11 +278,11 @@ Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) {
// only be float32, seems like a weird limitation to impose // only be float32, seems like a weird limitation to impose
Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
TensorPtr float_tensor; TensorPtr float_tensor;
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
double data = json_obj; double data = json_obj;
MS_LOG(INFO) << "double found: " << json_obj << "."; MS_LOG(INFO) << "double found: " << json_obj << ".";
RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, &float_tensor)); RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, &float_tensor));
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
float data1 = json_obj; float data1 = json_obj;
RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data1, &float_tensor)); RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data1, &float_tensor));
MS_LOG(INFO) << "float found: " << json_obj << "."; MS_LOG(INFO) << "float found: " << json_obj << ".";
@ -293,11 +294,11 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num,
// Loads a tensor with int value, we have to cast the value to type specified in the schema. // Loads a tensor with int value, we have to cast the value to type specified in the schema.
Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
TensorPtr int_tensor; TensorPtr int_tensor;
if (data_schema_->column(col_num).type() == DataType::DE_INT64) { if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
int64_t data = json_obj; int64_t data = json_obj;
MS_LOG(INFO) << "int64 found: " << json_obj << "."; MS_LOG(INFO) << "int64 found: " << json_obj << ".";
RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, &int_tensor)); RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, &int_tensor));
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
int32_t data = json_obj; int32_t data = json_obj;
RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, &int_tensor)); RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, &int_tensor));
MS_LOG(INFO) << "int32 found: " << json_obj << "."; MS_LOG(INFO) << "int32 found: " << json_obj << ".";
@ -349,35 +350,35 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row) { Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row) {
int32_t i = index; int32_t i = index;
// special case to handle // special case to handle
if (data_schema_->column(i).name() == "id") { if (data_schema_->Column(i).Name() == "id") {
// id is internal, special case to load from file // id is internal, special case to load from file
return LoadIDTensor(file, i, row); return LoadIDTensor(file, i, row);
} }
// find if key does not exist, insert placeholder nullptr if not found // find if key does not exist, insert placeholder nullptr if not found
if (js.find(data_schema_->column(i).name()) == js.end()) { if (js.find(data_schema_->Column(i).Name()) == js.end()) {
// iterator not found, push nullptr as placeholder // iterator not found, push nullptr as placeholder
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << "."; MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << ".";
return LoadEmptyTensor(i, row); return LoadEmptyTensor(i, row);
} }
nlohmann::json column_value = js.at(data_schema_->column(i).name()); nlohmann::json column_value = js.at(data_schema_->Column(i).Name());
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << "."; MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << ".";
bool is_array = column_value.is_array(); bool is_array = column_value.is_array();
// load single string // load single string
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) { if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) {
return LoadStringTensor(column_value, i, row); return LoadStringTensor(column_value, i, row);
} }
// load string array // load string array
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) { if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) {
return LoadStringArrayTensor(column_value, i, row); return LoadStringArrayTensor(column_value, i, row);
} }
// load image file // load image file
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) { if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) {
std::string image_file_path = column_value; std::string image_file_path = column_value;
return LoadImageTensor(image_file_path, i, row); return LoadImageTensor(image_file_path, i, row);
} }
// load float value // load float value
bool judge_float = (data_schema_->column(i).type() == DataType::DE_FLOAT32) || bool judge_float = (data_schema_->Column(i).Type() == DataType::DE_FLOAT32) ||
(data_schema_->column(i).type() == DataType::DE_FLOAT64); (data_schema_->Column(i).Type() == DataType::DE_FLOAT64);
if (!is_array && judge_float) { if (!is_array && judge_float) {
return LoadFloatTensor(column_value, i, row); return LoadFloatTensor(column_value, i, row);
} }
@ -387,15 +388,15 @@ Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann:
} }
// int value // int value
if (!is_array && if (!is_array &&
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
return LoadIntTensor(column_value, i, row); return LoadIntTensor(column_value, i, row);
} }
// int array // int array
if (is_array && if (is_array &&
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
return LoadIntArrayTensor(column_value, i, row); return LoadIntArrayTensor(column_value, i, row);
} else { } else {
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported."; MS_LOG(WARNING) << "Value type for column: " << data_schema_->Column(i).Name() << " is not supported.";
return Status::OK(); return Status::OK();
} }
} }
@ -438,7 +439,7 @@ Status AlbumOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -258,7 +258,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
} }
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(
Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->column(1).type(), &label)); Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->Column(1).Type(), &label));
RETURN_IF_NOT_OK(label->Zero()); RETURN_IF_NOT_OK(label->Zero());
for (uint32_t index = 0; index < image_label.second.size(); index++) { for (uint32_t index = 0; index < image_label.second.size(); index++) {
if (image_label.second[index] == 1) { if (image_label.second[index] == 1) {
@ -294,7 +294,7 @@ Status CelebAOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t index = 0; index < data_schema_->NumColumns(); index++) { for (int32_t index = 0; index < data_schema_->NumColumns(); index++) {
column_name_id_map_[data_schema_->column(index).name()] = index; column_name_id_map_[data_schema_->Column(index).Name()] = index;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -205,8 +205,8 @@ Status CifarOp::GetCifarFiles() {
Path dir_path(folder_path_); Path dir_path(folder_path_);
auto dirIt = Path::DirIterator::OpenDirectory(&dir_path); auto dirIt = Path::DirIterator::OpenDirectory(&dir_path);
if (dirIt) { if (dirIt) {
while (dirIt->hasNext()) { while (dirIt->HasNext()) {
Path file = dirIt->next(); Path file = dirIt->Next();
if (file.Extension() == kExtension) { if (file.Extension() == kExtension) {
cifar_files_.push_back(file.toString()); cifar_files_.push_back(file.toString());
} }
@ -236,7 +236,7 @@ Status CifarOp::ParseCifarData() {
std::shared_ptr<Tensor> image_tensor; std::shared_ptr<Tensor> image_tensor;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}), RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
data_schema_->column(0).type(), &image_tensor)); data_schema_->Column(0).Type(), &image_tensor));
auto itr = image_tensor->begin<uint8_t>(); auto itr = image_tensor->begin<uint8_t>();
uint32_t total_pix = kCifarImageHeight * kCifarImageWidth; uint32_t total_pix = kCifarImageHeight * kCifarImageWidth;
for (uint32_t pix = 0; pix < total_pix; ++pix) { for (uint32_t pix = 0; pix < total_pix; ++pix) {
@ -369,7 +369,7 @@ Status CifarOp::ComputeColMap() {
// set the column name map (base class field) // set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -86,7 +86,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
} }
std::string kImageFile = image_folder_path_ + std::string("/") + image_id; std::string kImageFile = image_folder_path_ + std::string("/") + image_id;
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
auto bboxRow = itr->second; auto bboxRow = itr->second;
std::vector<float> bbox_row; std::vector<float> bbox_row;
@ -505,7 +505,7 @@ Status CocoOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -240,7 +240,7 @@ Status FlickrOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -156,8 +156,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_name); RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_name);
} }
std::set<std::string> imgs; // use this for ordering std::set<std::string> imgs; // use this for ordering
while (dirItr->hasNext()) { while (dirItr->HasNext()) {
Path file = dirItr->next(); Path file = dirItr->Next();
if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
(void)imgs.insert(file.toString().substr(dirname_offset_)); (void)imgs.insert(file.toString().substr(dirname_offset_));
} else { } else {
@ -182,8 +182,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
Status ImageFolderOp::RecursiveWalkFolder(Path *dir) { Status ImageFolderOp::RecursiveWalkFolder(Path *dir) {
std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(dir); std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(dir);
RETURN_UNEXPECTED_IF_NULL(dir_itr); RETURN_UNEXPECTED_IF_NULL(dir_itr);
while (dir_itr->hasNext()) { while (dir_itr->HasNext()) {
Path subdir = dir_itr->next(); Path subdir = dir_itr->Next();
if (subdir.IsDirectory()) { if (subdir.IsDirectory()) {
if (class_index_.empty() || if (class_index_.empty() ||
class_index_.find(subdir.toString().substr(dirname_offset_ + 1)) != class_index_.end()) { class_index_.find(subdir.toString().substr(dirname_offset_ + 1)) != class_index_.end()) {
@ -256,8 +256,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
std::queue<std::string> folder_paths; std::queue<std::string> folder_paths;
std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir); std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir);
std::unordered_set<std::string> folder_names; std::unordered_set<std::string> folder_names;
while (dir_itr->hasNext()) { while (dir_itr->HasNext()) {
Path subdir = dir_itr->next(); Path subdir = dir_itr->Next();
if (subdir.IsDirectory()) { if (subdir.IsDirectory()) {
folder_paths.push(subdir.toString()); folder_paths.push(subdir.toString());
if (!class_index.empty()) folder_names.insert(subdir.Basename()); if (!class_index.empty()) folder_names.insert(subdir.Basename());
@ -283,7 +283,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
if (subdir.Exists() == false || dir_itr == nullptr) { if (subdir.Exists() == false || dir_itr == nullptr) {
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.toString()); RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.toString());
} }
while (dir_itr->hasNext()) { while (dir_itr->HasNext()) {
if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) { if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) {
++row_cnt; ++row_cnt;
} }
@ -298,7 +298,7 @@ Status ImageFolderOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -274,7 +274,7 @@ Status ManifestOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -113,7 +113,7 @@ Status MindRecordOp::Init() {
CHECK_FAIL_RETURN_UNEXPECTED( CHECK_FAIL_RETURN_UNEXPECTED(
colname_to_ind.find(colname) != colname_to_ind.end(), colname_to_ind.find(colname) != colname_to_ind.end(),
"Invalid data, specified loading column name: " + colname + " does not exist in data file."); "Invalid data, specified loading column name: " + colname + " does not exist in data file.");
RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname]))); RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname])));
} }
data_schema_ = std::move(tmp_schema); data_schema_ = std::move(tmp_schema);
} }
@ -271,8 +271,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
} }
std::shared_ptr<Tensor> tensor; std::shared_ptr<Tensor> tensor;
const ColDescriptor &column = data_schema_->column(i_col); const ColDescriptor &column = data_schema_->Column(i_col);
DataType type = column.type(); DataType type = column.Type();
// Set shape // Set shape
CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0."); CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0.");
@ -280,8 +280,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
if (type == DataType::DE_STRING) { if (type == DataType::DE_STRING) {
std::string s{data, data + n_bytes}; std::string s{data, data + n_bytes};
RETURN_IF_NOT_OK(Tensor::CreateScalar(s, &tensor)); RETURN_IF_NOT_OK(Tensor::CreateScalar(s, &tensor));
} else if (column.hasShape()) { } else if (column.HasShape()) {
auto new_shape = TensorShape(column.shape()); auto new_shape = TensorShape(column.Shape());
// if the numpy is null, create empty tensor shape // if the numpy is null, create empty tensor shape
if (num_elements == 0) { if (num_elements == 0) {
new_shape = TensorShape({}); new_shape = TensorShape({});

View File

@ -180,7 +180,7 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
pixels[m] = (pixels[m] == 0) ? 0 : 255; pixels[m] = (pixels[m] == 0) ? 0 : 255;
} }
std::shared_ptr<Tensor> image; std::shared_ptr<Tensor> image;
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(), RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->Column(0).Type(),
reinterpret_cast<unsigned char *>(pixels), &image)); reinterpret_cast<unsigned char *>(pixels), &image));
image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j])); image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j]));
image_path_.push_back(image_names_[index]); image_path_.push_back(image_names_[index]);
@ -225,8 +225,8 @@ Status MnistOp::WalkAllFiles() {
std::string prefix; // empty string, used to match usage = "" (default) or usage == "all" std::string prefix; // empty string, used to match usage = "" (default) or usage == "all"
if (usage_ == "train" || usage_ == "test") prefix = (usage_ == "test" ? test_prefix : train_prefix); if (usage_ == "train" || usage_ == "test") prefix = (usage_ == "test" ? test_prefix : train_prefix);
if (dir_it != nullptr) { if (dir_it != nullptr) {
while (dir_it->hasNext()) { while (dir_it->HasNext()) {
Path file = dir_it->next(); Path file = dir_it->Next();
std::string fname = file.Basename(); // name of the mnist file std::string fname = file.Basename(); // name of the mnist file
if ((fname.find(prefix + "-images") != std::string::npos) && (fname.find(img_ext) != std::string::npos)) { if ((fname.find(prefix + "-images") != std::string::npos) && (fname.find(img_ext) != std::string::npos)) {
image_names_.push_back(file.toString()); image_names_.push_back(file.toString());
@ -307,7 +307,7 @@ Status MnistOp::ComputeColMap() {
// set the column name map (base class field) // set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -267,8 +267,8 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
// Create a tensor for each column, then add the tensor to the row // Create a tensor for each column, then add the tensor to the row
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
const ColDescriptor current_col = data_schema_->column(i); const ColDescriptor current_col = data_schema_->Column(i);
std::vector<dsize_t> current_shape = current_col.shape().AsVector(); std::vector<dsize_t> current_shape = current_col.Shape().AsVector();
std::unique_ptr<TensorShape> new_shape = nullptr; std::unique_ptr<TensorShape> new_shape = nullptr;
std::unique_ptr<unsigned char[]> buf = nullptr; std::unique_ptr<unsigned char[]> buf = nullptr;
std::shared_ptr<Tensor> new_tensor = nullptr; std::shared_ptr<Tensor> new_tensor = nullptr;
@ -282,7 +282,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
} }
new_shape = std::make_unique<TensorShape>(current_shape); new_shape = std::make_unique<TensorShape>(current_shape);
int64_t size_in_bytes = new_shape->NumOfElements() * current_col.type().SizeInBytes(); int64_t size_in_bytes = new_shape->NumOfElements() * current_col.Type().SizeInBytes();
// Generate a random byte of data. This may cause some funny data for things like doubles,floats, bools // Generate a random byte of data. This may cause some funny data for things like doubles,floats, bools
// however the random data op is not too concerned about the physical data itself. // however the random data op is not too concerned about the physical data itself.
@ -296,7 +296,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor."); return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
} }
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.type(), buf.get(), &new_tensor)); RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor));
// Add this tensor to the tensor row for output // Add this tensor to the tensor row for output
(*new_row).push_back(std::move(new_tensor)); (*new_row).push_back(std::move(new_tensor));

View File

@ -75,7 +75,7 @@ Status SamplerRT::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64
col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1);
} }
TensorShape shape(std::vector<dsize_t>(1, num_elements)); TensorShape shape(std::vector<dsize_t>(1, num_elements));
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->type(), sample_ids)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->Type(), sample_ids));
return Status::OK(); return Status::OK();
} }

View File

@ -225,7 +225,7 @@ Status TextFileOp::ComputeColMap() {
// Set the column name mapping (base class field) // Set the column name mapping (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -123,7 +123,7 @@ Status TFReaderOp::Init() {
} }
if (total_rows_ == 0) { if (total_rows_ == 0) {
total_rows_ = data_schema_->num_rows(); total_rows_ = data_schema_->NumRows();
} }
if (total_rows_ < 0) { if (total_rows_ < 0) {
RETURN_STATUS_UNEXPECTED( RETURN_STATUS_UNEXPECTED(
@ -332,12 +332,12 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i
Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *out_row) { Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *out_row) {
int32_t num_columns = data_schema_->NumColumns(); int32_t num_columns = data_schema_->NumColumns();
for (int32_t col = 0; col < num_columns; ++col) { for (int32_t col = 0; col < num_columns; ++col) {
const ColDescriptor current_col = data_schema_->column(col); const ColDescriptor current_col = data_schema_->Column(col);
const dataengine::Features &example_features = tf_file->features(); const dataengine::Features &example_features = tf_file->features();
const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature(); const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature();
auto iter_column = feature_map.find(current_col.name()); auto iter_column = feature_map.find(current_col.Name());
if (iter_column == feature_map.end()) { if (iter_column == feature_map.end()) {
RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.name() + " does not exist."); RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist.");
} }
const dataengine::Feature &column_values_list = iter_column->second; const dataengine::Feature &column_values_list = iter_column->second;
RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col)); RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col));
@ -379,7 +379,7 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature
// into the tensor // into the tensor
TensorShape current_shape = TensorShape::CreateUnknownRankShape(); TensorShape current_shape = TensorShape::CreateUnknownRankShape();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, &current_shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, &current_shape));
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.type(), data_ptr, &ts)); RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.Type(), data_ptr, &ts));
break; break;
} }
case dataengine::Feature::KindCase::kInt64List: { case dataengine::Feature::KindCase::kInt64List: {
@ -406,10 +406,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
// kBytesList can map to the following DE types ONLY! // kBytesList can map to the following DE types ONLY!
// DE_UINT8, DE_INT8 // DE_UINT8, DE_INT8
// Must be single byte type for each element! // Must be single byte type for each element!
if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8 && if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 &&
current_col.type() != DataType::DE_STRING) { current_col.Type() != DataType::DE_STRING) {
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
", data type should be int8, uint8 or string, but got " + current_col.type().ToString(); ", data type should be int8, uint8 or string, but got " + current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg); RETURN_STATUS_UNEXPECTED(err_msg);
} }
@ -417,7 +417,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
*num_elements = bytes_list.value_size(); *num_elements = bytes_list.value_size();
if (current_col.type() == DataType::DE_STRING) { if (current_col.Type() == DataType::DE_STRING) {
TensorShape shape = TensorShape::CreateScalar(); TensorShape shape = TensorShape::CreateScalar();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape));
RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor)); RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor));
@ -436,14 +436,14 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
int64_t pad_size = max_size; int64_t pad_size = max_size;
// if user provides a shape in the form of [-1, d1, 2d, ... , dn], we need to pad to d1 * d2 * ... * dn // if user provides a shape in the form of [-1, d1, 2d, ... , dn], we need to pad to d1 * d2 * ... * dn
if (current_col.hasShape()) { if (current_col.HasShape()) {
TensorShape cur_shape = current_col.shape(); TensorShape cur_shape = current_col.Shape();
if (cur_shape.Size() >= 2 && cur_shape[0] == TensorShape::kDimUnknown) { if (cur_shape.Size() >= 2 && cur_shape[0] == TensorShape::kDimUnknown) {
int64_t new_pad_size = 1; int64_t new_pad_size = 1;
for (int i = 1; i < cur_shape.Size(); ++i) { for (int i = 1; i < cur_shape.Size(); ++i) {
if (cur_shape[i] == TensorShape::kDimUnknown) { if (cur_shape[i] == TensorShape::kDimUnknown) {
std::string err_msg = std::string err_msg =
"Invalid data, more than one unknown dimension in the shape of column: " + current_col.name(); "Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name();
RETURN_STATUS_UNEXPECTED(err_msg); RETURN_STATUS_UNEXPECTED(err_msg);
} }
new_pad_size *= cur_shape[i]; new_pad_size *= cur_shape[i];
@ -451,7 +451,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
pad_size = new_pad_size; pad_size = new_pad_size;
} else { } else {
if (cur_shape.known() && cur_shape.NumOfElements() != max_size) { if (cur_shape.known() && cur_shape.NumOfElements() != max_size) {
std::string err_msg = "Shape in schema's column '" + current_col.name() + "' is incorrect." + std::string err_msg = "Shape in schema's column '" + current_col.Name() + "' is incorrect." +
"\nshape received: " + cur_shape.ToString() + "\nshape received: " + cur_shape.ToString() +
"\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) + "\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) +
"\nexpected total elements in shape: " + std::to_string(max_size); "\nexpected total elements in shape: " + std::to_string(max_size);
@ -463,7 +463,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
// know how many elements there are and the total bytes, create tensor here: // know how many elements there are and the total bytes, create tensor here:
TensorShape current_shape = TensorShape::CreateScalar(); TensorShape current_shape = TensorShape::CreateScalar();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape));
RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.type(), pad_size, tensor)); RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.Type(), pad_size, tensor));
return Status::OK(); return Status::OK();
} }
@ -472,9 +472,9 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor &current_col, const dataeng
int32_t *num_elements, std::unique_ptr<float[]> *float_array) { int32_t *num_elements, std::unique_ptr<float[]> *float_array) {
// KFloatList can only map to DE types: // KFloatList can only map to DE types:
// DE_FLOAT32 // DE_FLOAT32
if (current_col.type() != DataType::DE_FLOAT32) { if (current_col.Type() != DataType::DE_FLOAT32) {
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
", data type should be string, but got " + current_col.type().ToString(); ", data type should be string, but got " + current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg); RETURN_STATUS_UNEXPECTED(err_msg);
} }
@ -494,26 +494,26 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor &current_col, const dataeng
// Determines which template type to use and calls LoadIntList // Determines which template type to use and calls LoadIntList
Status TFReaderOp::LoadIntListSwitch(const ColDescriptor &current_col, const dataengine::Feature &column_values_list, Status TFReaderOp::LoadIntListSwitch(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
int32_t *num_elements, std::shared_ptr<Tensor> *tensor) { int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
if (current_col.type() == DataType::DE_UINT64) { if (current_col.Type() == DataType::DE_UINT64) {
RETURN_IF_NOT_OK(LoadIntList<uint64_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<uint64_t>(current_col, column_values_list, num_elements, tensor));
} else if (current_col.type() == DataType::DE_INT64) { } else if (current_col.Type() == DataType::DE_INT64) {
RETURN_IF_NOT_OK(LoadIntList<int64_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<int64_t>(current_col, column_values_list, num_elements, tensor));
} else if (current_col.type() == DataType::DE_UINT32) { } else if (current_col.Type() == DataType::DE_UINT32) {
RETURN_IF_NOT_OK(LoadIntList<uint32_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<uint32_t>(current_col, column_values_list, num_elements, tensor));
} else if (current_col.type() == DataType::DE_INT32) { } else if (current_col.Type() == DataType::DE_INT32) {
RETURN_IF_NOT_OK(LoadIntList<int32_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<int32_t>(current_col, column_values_list, num_elements, tensor));
} else if (current_col.type() == DataType::DE_UINT16) { } else if (current_col.Type() == DataType::DE_UINT16) {
RETURN_IF_NOT_OK(LoadIntList<uint16_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<uint16_t>(current_col, column_values_list, num_elements, tensor));
} else if (current_col.type() == DataType::DE_INT16) { } else if (current_col.Type() == DataType::DE_INT16) {
RETURN_IF_NOT_OK(LoadIntList<int16_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<int16_t>(current_col, column_values_list, num_elements, tensor));
} else if (current_col.type() == DataType::DE_UINT8) { } else if (current_col.Type() == DataType::DE_UINT8) {
RETURN_IF_NOT_OK(LoadIntList<uint8_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<uint8_t>(current_col, column_values_list, num_elements, tensor));
} else if (current_col.type() == DataType::DE_INT8) { } else if (current_col.Type() == DataType::DE_INT8) {
RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor)); RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor));
} else { } else {
std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.name() + std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() +
", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" + ", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" +
", but got " + current_col.type().ToString(); ", but got " + current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg); RETURN_STATUS_UNEXPECTED(err_msg);
} }
@ -525,9 +525,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor &current_col, const dat
template <typename T> template <typename T>
Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list, Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengine::Feature &column_values_list,
int32_t *num_elements, std::shared_ptr<Tensor> *tensor) { int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
if (!(current_col.type().IsInt())) { if (!(current_col.Type().IsInt())) {
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
", data type should be int, but got " + current_col.type().ToString(); ", data type should be int, but got " + current_col.Type().ToString();
RETURN_STATUS_UNEXPECTED(err_msg); RETURN_STATUS_UNEXPECTED(err_msg);
} }
@ -540,7 +540,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengin
// know how many elements there are, create tensor here: // know how many elements there are, create tensor here:
TensorShape current_shape = TensorShape::CreateUnknownRankShape(); TensorShape current_shape = TensorShape::CreateUnknownRankShape();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &current_shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &current_shape));
RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.type(), tensor)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.Type(), tensor));
int64_t i = 0; int64_t i = 0;
auto it = (*tensor)->begin<T>(); auto it = (*tensor)->begin<T>();
@ -719,7 +719,7 @@ Status TFReaderOp::ComputeColMap() {
// Construct the column name map for this operator (base class field) // Construct the column name map for this operator (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -83,8 +83,8 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
std::shared_ptr<Tensor> image, target; std::shared_ptr<Tensor> image, target;
const std::string kTargetFile = const std::string kTargetFile =
folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension); folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target)); RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target));
(*trow) = TensorRow(row_id, {std::move(image), std::move(target)}); (*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
path_list = {kImageFile, kTargetFile}; path_list = {kImageFile, kTargetFile};
} else if (task_type_ == TaskType::Detection) { } else if (task_type_ == TaskType::Detection) {
@ -92,7 +92,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
TensorRow annotation; TensorRow annotation;
const std::string kAnnotationFile = const std::string kAnnotationFile =
folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension); folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation)); RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
trow->setId(row_id); trow->setId(row_id);
trow->push_back(std::move(image)); trow->push_back(std::move(image));
@ -326,7 +326,7 @@ Status VOCOp::ComputeColMap() {
// Set the column name map (base class field) // Set the column name map (base class field)
if (column_name_id_map_.empty()) { if (column_name_id_map_.empty()) {
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
column_name_id_map_[data_schema_->column(i).name()] = i; column_name_id_map_[data_schema_->Column(i).Name()] = i;
} }
} else { } else {
MS_LOG(WARNING) << "Column name map is already set!"; MS_LOG(WARNING) << "Column name map is already set!";

View File

@ -62,6 +62,7 @@ ExecutionTree::~ExecutionTree() {
// provides it with a link to the tree. A node cannot form any relationships (parent/child) with // provides it with a link to the tree. A node cannot form any relationships (parent/child) with
// other nodes unless they are associated with the same tree. // other nodes unless they are associated with the same tree.
Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) { Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) {
RETURN_UNEXPECTED_IF_NULL(op);
// If we are already a part of the tree, no-op // If we are already a part of the tree, no-op
if (op->tree_ == this) { if (op->tree_ == this) {
return Status::OK(); return Status::OK();
@ -88,6 +89,7 @@ Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) {
// Sets the root node of the tree // Sets the root node of the tree
Status ExecutionTree::AssignRoot(const std::shared_ptr<DatasetOp> &op) { Status ExecutionTree::AssignRoot(const std::shared_ptr<DatasetOp> &op) {
RETURN_UNEXPECTED_IF_NULL(op);
// Tree must be in building state before we can assign root to it // Tree must be in building state before we can assign root to it
if (tree_state_ != kDeTStateBuilding) { if (tree_state_ != kDeTStateBuilding) {
std::string err_msg = std::string err_msg =
@ -121,6 +123,9 @@ void ExecutionTree::Print(std::ostream &out, const std::shared_ptr<DatasetOp> &o
// A helper functions for doing the recursive printing // A helper functions for doing the recursive printing
void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp> &dataset_op, std::string indent, void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp> &dataset_op, std::string indent,
bool last, bool detailed) const { bool last, bool detailed) const {
if (dataset_op == nullptr) {
return;
}
// Decide which printer to use based on detailed arg. // Decide which printer to use based on detailed arg.
if (!detailed) { if (!detailed) {
out << indent << "+- " << *dataset_op; out << indent << "+- " << *dataset_op;

View File

@ -41,6 +41,7 @@ GraphDataImpl::GraphDataImpl(std::string dataset_file, int32_t num_workers, bool
GraphDataImpl::~GraphDataImpl() {} GraphDataImpl::~GraphDataImpl() {}
Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *out) { Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *out) {
RETURN_UNEXPECTED_IF_NULL(out);
auto itr = node_type_map_.find(node_type); auto itr = node_type_map_.find(node_type);
if (itr == node_type_map_.end()) { if (itr == node_type_map_.end()) {
std::string err_msg = "Invalid node type:" + std::to_string(node_type); std::string err_msg = "Invalid node type:" + std::to_string(node_type);
@ -54,6 +55,7 @@ Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *o
template <typename T> template <typename T>
Status GraphDataImpl::CreateTensorByVector(const std::vector<std::vector<T>> &data, DataType type, Status GraphDataImpl::CreateTensorByVector(const std::vector<std::vector<T>> &data, DataType type,
std::shared_ptr<Tensor> *out) { std::shared_ptr<Tensor> *out) {
RETURN_UNEXPECTED_IF_NULL(out);
if (!type.IsCompatible<T>()) { if (!type.IsCompatible<T>()) {
RETURN_STATUS_UNEXPECTED("Data type not compatible"); RETURN_STATUS_UNEXPECTED("Data type not compatible");
} }
@ -96,6 +98,7 @@ Status GraphDataImpl::ComplementVector(std::vector<std::vector<T>> *data, size_t
} }
Status GraphDataImpl::GetAllEdges(EdgeType edge_type, std::shared_ptr<Tensor> *out) { Status GraphDataImpl::GetAllEdges(EdgeType edge_type, std::shared_ptr<Tensor> *out) {
RETURN_UNEXPECTED_IF_NULL(out);
auto itr = edge_type_map_.find(edge_type); auto itr = edge_type_map_.find(edge_type);
if (itr == edge_type_map_.end()) { if (itr == edge_type_map_.end()) {
std::string err_msg = "Invalid edge type:" + std::to_string(edge_type); std::string err_msg = "Invalid edge type:" + std::to_string(edge_type);
@ -110,6 +113,7 @@ Status GraphDataImpl::GetNodesFromEdges(const std::vector<EdgeIdType> &edge_list
if (edge_list.empty()) { if (edge_list.empty()) {
RETURN_STATUS_UNEXPECTED("Input edge_list is empty"); RETURN_STATUS_UNEXPECTED("Input edge_list is empty");
} }
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<std::vector<NodeIdType>> node_list; std::vector<std::vector<NodeIdType>> node_list;
node_list.reserve(edge_list.size()); node_list.reserve(edge_list.size());
@ -156,6 +160,7 @@ Status GraphDataImpl::GetAllNeighbors(const std::vector<NodeIdType> &node_list,
const OutputFormat &format, std::shared_ptr<Tensor> *out) { const OutputFormat &format, std::shared_ptr<Tensor> *out) {
CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type)); RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type));
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<std::vector<NodeIdType>> neighbors; std::vector<std::vector<NodeIdType>> neighbors;
@ -251,6 +256,7 @@ Status GraphDataImpl::GetSampledNeighbors(const std::vector<NodeIdType> &node_li
for (const auto &type : neighbor_types) { for (const auto &type : neighbor_types) {
RETURN_IF_NOT_OK(CheckNeighborType(type)); RETURN_IF_NOT_OK(CheckNeighborType(type));
} }
RETURN_UNEXPECTED_IF_NULL(out);
std::vector<std::vector<NodeIdType>> neighbors_vec(node_list.size()); std::vector<std::vector<NodeIdType>> neighbors_vec(node_list.size());
for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) { for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) {
std::shared_ptr<Node> input_node; std::shared_ptr<Node> input_node;
@ -285,6 +291,7 @@ Status GraphDataImpl::NegativeSample(const std::vector<NodeIdType> &data, const
size_t *start_index, const std::unordered_set<NodeIdType> &exclude_data, size_t *start_index, const std::unordered_set<NodeIdType> &exclude_data,
int32_t samples_num, std::vector<NodeIdType> *out_samples) { int32_t samples_num, std::vector<NodeIdType> *out_samples) {
CHECK_FAIL_RETURN_UNEXPECTED(!data.empty(), "Input data is empty."); CHECK_FAIL_RETURN_UNEXPECTED(!data.empty(), "Input data is empty.");
RETURN_UNEXPECTED_IF_NULL(start_index);
size_t index = *start_index; size_t index = *start_index;
for (size_t i = index; i < shuffled_ids.size(); ++i) { for (size_t i = index; i < shuffled_ids.size(); ++i) {
++index; ++index;
@ -305,6 +312,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
RETURN_IF_NOT_OK(CheckSamplesNum(samples_num)); RETURN_IF_NOT_OK(CheckSamplesNum(samples_num));
RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type)); RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type));
RETURN_UNEXPECTED_IF_NULL(out);
const std::vector<NodeIdType> &all_nodes = node_type_map_[neg_neighbor_type]; const std::vector<NodeIdType> &all_nodes = node_type_map_[neg_neighbor_type];
std::vector<NodeIdType> shuffled_id(all_nodes.size()); std::vector<NodeIdType> shuffled_id(all_nodes.size());
@ -321,9 +329,9 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
std::vector<NodeIdType> neighbors; std::vector<NodeIdType> neighbors;
RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors)); RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors));
std::unordered_set<NodeIdType> exclude_nodes; std::unordered_set<NodeIdType> exclude_nodes;
std::transform(neighbors.begin(), neighbors.end(), (void)std::transform(neighbors.begin(), neighbors.end(),
std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()), std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()),
[](const NodeIdType node) { return node; }); [](const NodeIdType node) { return node; });
neg_neighbors_vec[node_idx].emplace_back(node->id()); neg_neighbors_vec[node_idx].emplace_back(node->id());
if (all_nodes.size() > exclude_nodes.size()) { if (all_nodes.size() > exclude_nodes.size()) {
while (neg_neighbors_vec[node_idx].size() < samples_num + 1) { while (neg_neighbors_vec[node_idx].size() < samples_num + 1) {
@ -355,6 +363,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path, Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path,
float step_home_param, float step_away_param, NodeIdType default_node, float step_home_param, float step_away_param, NodeIdType default_node,
std::shared_ptr<Tensor> *out) { std::shared_ptr<Tensor> *out) {
RETURN_UNEXPECTED_IF_NULL(out);
RETURN_IF_NOT_OK(random_walk_.Build(node_list, meta_path, step_home_param, step_away_param, default_node)); RETURN_IF_NOT_OK(random_walk_.Build(node_list, meta_path, step_home_param, step_away_param, default_node));
std::vector<std::vector<NodeIdType>> walks; std::vector<std::vector<NodeIdType>> walks;
RETURN_IF_NOT_OK(random_walk_.SimulateWalk(&walks)); RETURN_IF_NOT_OK(random_walk_.SimulateWalk(&walks));
@ -363,6 +372,7 @@ Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const
} }
Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) { Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
RETURN_UNEXPECTED_IF_NULL(out_feature);
auto itr = default_node_feature_map_.find(feature_type); auto itr = default_node_feature_map_.find(feature_type);
if (itr == default_node_feature_map_.end()) { if (itr == default_node_feature_map_.end()) {
std::string err_msg = "Invalid feature type:" + std::to_string(feature_type); std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
@ -374,6 +384,7 @@ Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::share
} }
Status GraphDataImpl::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) { Status GraphDataImpl::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
RETURN_UNEXPECTED_IF_NULL(out_feature);
auto itr = default_edge_feature_map_.find(feature_type); auto itr = default_edge_feature_map_.find(feature_type);
if (itr == default_edge_feature_map_.end()) { if (itr == default_edge_feature_map_.end()) {
std::string err_msg = "Invalid feature type:" + std::to_string(feature_type); std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
@ -390,6 +401,7 @@ Status GraphDataImpl::GetNodeFeature(const std::shared_ptr<Tensor> &nodes,
RETURN_STATUS_UNEXPECTED("Input nodes is empty"); RETURN_STATUS_UNEXPECTED("Input nodes is empty");
} }
CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty"); CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
RETURN_UNEXPECTED_IF_NULL(out);
TensorRow tensors; TensorRow tensors;
for (const auto &f_type : feature_types) { for (const auto &f_type : feature_types) {
std::shared_ptr<Feature> default_feature; std::shared_ptr<Feature> default_feature;
@ -436,6 +448,7 @@ Status GraphDataImpl::GetNodeFeatureSharedMemory(const std::shared_ptr<Tensor> &
if (!nodes || nodes->Size() == 0) { if (!nodes || nodes->Size() == 0) {
RETURN_STATUS_UNEXPECTED("Input nodes is empty"); RETURN_STATUS_UNEXPECTED("Input nodes is empty");
} }
RETURN_UNEXPECTED_IF_NULL(out);
TensorShape shape = nodes->shape().AppendDim(2); TensorShape shape = nodes->shape().AppendDim(2);
std::shared_ptr<Tensor> fea_tensor; std::shared_ptr<Tensor> fea_tensor;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor));
@ -478,6 +491,7 @@ Status GraphDataImpl::GetEdgeFeature(const std::shared_ptr<Tensor> &edges,
RETURN_STATUS_UNEXPECTED("Input edges is empty"); RETURN_STATUS_UNEXPECTED("Input edges is empty");
} }
CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty"); CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
RETURN_UNEXPECTED_IF_NULL(out);
TensorRow tensors; TensorRow tensors;
for (const auto &f_type : feature_types) { for (const auto &f_type : feature_types) {
std::shared_ptr<Feature> default_feature; std::shared_ptr<Feature> default_feature;
@ -520,6 +534,7 @@ Status GraphDataImpl::GetEdgeFeatureSharedMemory(const std::shared_ptr<Tensor> &
if (!edges || edges->Size() == 0) { if (!edges || edges->Size() == 0) {
RETURN_STATUS_UNEXPECTED("Input edges is empty"); RETURN_STATUS_UNEXPECTED("Input edges is empty");
} }
RETURN_UNEXPECTED_IF_NULL(out);
TensorShape shape = edges->shape().AppendDim(2); TensorShape shape = edges->shape().AppendDim(2);
std::shared_ptr<Tensor> fea_tensor; std::shared_ptr<Tensor> fea_tensor;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor));
@ -554,14 +569,15 @@ Status GraphDataImpl::Init() {
} }
Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) { Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) {
RETURN_UNEXPECTED_IF_NULL(meta_info);
meta_info->node_type.resize(node_type_map_.size()); meta_info->node_type.resize(node_type_map_.size());
std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(), (void)std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(),
[](auto itr) { return itr.first; }); [](auto itr) { return itr.first; });
std::sort(meta_info->node_type.begin(), meta_info->node_type.end()); std::sort(meta_info->node_type.begin(), meta_info->node_type.end());
meta_info->edge_type.resize(edge_type_map_.size()); meta_info->edge_type.resize(edge_type_map_.size());
std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(), (void)std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(),
[](auto itr) { return itr.first; }); [](auto itr) { return itr.first; });
std::sort(meta_info->edge_type.begin(), meta_info->edge_type.end()); std::sort(meta_info->edge_type.begin(), meta_info->edge_type.end());
for (const auto &node : node_type_map_) { for (const auto &node : node_type_map_) {
@ -594,6 +610,7 @@ Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) {
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
Status GraphDataImpl::GraphInfo(py::dict *out) { Status GraphDataImpl::GraphInfo(py::dict *out) {
RETURN_UNEXPECTED_IF_NULL(out);
MetaInfo meta_info; MetaInfo meta_info;
RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); RETURN_IF_NOT_OK(GetMetaInfo(&meta_info));
(*out)["node_type"] = py::cast(meta_info.node_type); (*out)["node_type"] = py::cast(meta_info.node_type);
@ -616,6 +633,7 @@ Status GraphDataImpl::LoadNodeAndEdge() {
} }
Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) { Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) {
RETURN_UNEXPECTED_IF_NULL(node);
auto itr = node_id_map_.find(id); auto itr = node_id_map_.find(id);
if (itr == node_id_map_.end()) { if (itr == node_id_map_.end()) {
std::string err_msg = "Invalid node id:" + std::to_string(id); std::string err_msg = "Invalid node id:" + std::to_string(id);
@ -627,6 +645,7 @@ Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node
} }
Status GraphDataImpl::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge) { Status GraphDataImpl::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge) {
RETURN_UNEXPECTED_IF_NULL(edge);
auto itr = edge_id_map_.find(id); auto itr = edge_id_map_.find(id);
if (itr == edge_id_map_.end()) { if (itr == edge_id_map_.end()) {
std::string err_msg = "Invalid edge id:" + std::to_string(id); std::string err_msg = "Invalid edge id:" + std::to_string(id);
@ -682,6 +701,7 @@ Status GraphDataImpl::RandomWalkBase::Build(const std::vector<NodeIdType> &node_
} }
Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::vector<NodeIdType> *walk_path) { Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::vector<NodeIdType> *walk_path) {
RETURN_UNEXPECTED_IF_NULL(walk_path);
// Simulate a random walk starting from start node. // Simulate a random walk starting from start node.
auto walk = std::vector<NodeIdType>(1, start_node); // walk is an vector auto walk = std::vector<NodeIdType>(1, start_node); // walk is an vector
// walk simulate // walk simulate
@ -722,6 +742,7 @@ Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node,
} }
Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeIdType>> *walks) { Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeIdType>> *walks) {
RETURN_UNEXPECTED_IF_NULL(walks);
for (int32_t i = 0; i < num_walks_; ++i) { for (int32_t i = 0; i < num_walks_; ++i) {
for (const auto &node : node_list_) { for (const auto &node : node_list_) {
std::vector<NodeIdType> walk; std::vector<NodeIdType> walk;
@ -734,6 +755,7 @@ Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeI
Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_id, const NodeType &node_type, Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_id, const NodeType &node_type,
std::shared_ptr<StochasticIndex> *node_probability) { std::shared_ptr<StochasticIndex> *node_probability) {
RETURN_UNEXPECTED_IF_NULL(node_probability);
// Generate alias nodes // Generate alias nodes
std::shared_ptr<Node> node; std::shared_ptr<Node> node;
RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(node_id, &node)); RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(node_id, &node));
@ -749,6 +771,7 @@ Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_
Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst, Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst,
uint32_t meta_path_index, uint32_t meta_path_index,
std::shared_ptr<StochasticIndex> *edge_probability) { std::shared_ptr<StochasticIndex> *edge_probability) {
RETURN_UNEXPECTED_IF_NULL(edge_probability);
// Get the alias edge setup lists for a given edge. // Get the alias edge setup lists for a given edge.
std::shared_ptr<Node> src_node; std::shared_ptr<Node> src_node;
RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(src, &src_node)); RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(src, &src_node));
@ -760,6 +783,8 @@ Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src,
std::vector<NodeIdType> dst_neighbors; std::vector<NodeIdType> dst_neighbors;
RETURN_IF_NOT_OK(dst_node->GetAllNeighbors(meta_path_[meta_path_index + 1], &dst_neighbors, true)); RETURN_IF_NOT_OK(dst_node->GetAllNeighbors(meta_path_[meta_path_index + 1], &dst_neighbors, true));
CHECK_FAIL_RETURN_UNEXPECTED(step_home_param_ != 0, "Invalid data, step home parameter can't be zero.");
CHECK_FAIL_RETURN_UNEXPECTED(step_away_param_ != 0, "Invalid data, step away parameter can't be zero.");
std::sort(dst_neighbors.begin(), dst_neighbors.end()); std::sort(dst_neighbors.begin(), dst_neighbors.end());
std::vector<float> non_normalized_probability; std::vector<float> non_normalized_probability;
for (const auto &dst_nbr : dst_neighbors) { for (const auto &dst_nbr : dst_neighbors) {

View File

@ -17,6 +17,8 @@
#include "minddata/dataset/engine/gnn/graph_shared_memory.h" #include "minddata/dataset/engine/gnn/graph_shared_memory.h"
#include <string> #include <string>
#include "debug/common.h"
#include "utils/ms_utils.h"
#include "minddata/dataset/util/log_adapter.h" #include "minddata/dataset/util/log_adapter.h"
namespace mindspore { namespace mindspore {
@ -51,7 +53,9 @@ GraphSharedMemory::~GraphSharedMemory() {
Status GraphSharedMemory::CreateSharedMemory() { Status GraphSharedMemory::CreateSharedMemory() {
if (memory_key_ == -1) { if (memory_key_ == -1) {
// ftok to generate unique key // ftok to generate unique key
memory_key_ = ftok(mr_file_.data(), kGnnSharedMemoryId); auto realpath = Common::GetRealPath(mr_file_);
CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed, path=" + mr_file_);
memory_key_ = ftok(common::SafeCStr(realpath.value()), kGnnSharedMemoryId);
CHECK_FAIL_RETURN_UNEXPECTED(memory_key_ != -1, "Failed to get key of shared memory. file_name:" + mr_file_); CHECK_FAIL_RETURN_UNEXPECTED(memory_key_ != -1, "Failed to get key of shared memory. file_name:" + mr_file_);
std::stringstream stream; std::stringstream stream;
stream << std::hex << memory_key_; stream << std::hex << memory_key_;
@ -89,6 +93,7 @@ Status GraphSharedMemory::DeleteSharedMemory() {
Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) { Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) {
// shmget returns an identifier in shmid // shmget returns an identifier in shmid
CHECK_FAIL_RETURN_UNEXPECTED(memory_size_ >= 0, "Invalid memory size, should be greater than zero.");
int shmid = shmget(memory_key_, memory_size_, shmflg); int shmid = shmget(memory_key_, memory_size_, shmflg);
CHECK_FAIL_RETURN_UNEXPECTED(shmid != -1, "Failed to get shared memory. key=0x" + memory_key_str_); CHECK_FAIL_RETURN_UNEXPECTED(shmid != -1, "Failed to get shared memory. key=0x" + memory_key_str_);
@ -103,6 +108,7 @@ Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) {
Status GraphSharedMemory::InsertData(const uint8_t *data, int64_t len, int64_t *offset) { Status GraphSharedMemory::InsertData(const uint8_t *data, int64_t len, int64_t *offset) {
CHECK_FAIL_RETURN_UNEXPECTED(data, "Input data is nullptr."); CHECK_FAIL_RETURN_UNEXPECTED(data, "Input data is nullptr.");
CHECK_FAIL_RETURN_UNEXPECTED(len > 0, "Input len is invalid."); CHECK_FAIL_RETURN_UNEXPECTED(len > 0, "Input len is invalid.");
CHECK_FAIL_RETURN_UNEXPECTED(offset, "Input offset is nullptr.");
std::lock_guard<std::mutex> lck(mutex_); std::lock_guard<std::mutex> lck(mutex_);
CHECK_FAIL_RETURN_UNEXPECTED((memory_size_ - memory_offset_ >= len), CHECK_FAIL_RETURN_UNEXPECTED((memory_size_ - memory_offset_ >= len),

View File

@ -46,6 +46,7 @@ class GpuItemConnector : public Connector<std::vector<device::DataItemGpu>> {
} }
Status Pop(int32_t worker_id, std::vector<device::DataItemGpu> *result) noexcept override { Status Pop(int32_t worker_id, std::vector<device::DataItemGpu> *result) noexcept override {
RETURN_UNEXPECTED_IF_NULL(result);
{ {
MS_ASSERT(worker_id < num_consumers_); MS_ASSERT(worker_id < num_consumers_);
std::unique_lock<std::mutex> lock(m_); std::unique_lock<std::mutex> lock(m_);

View File

@ -30,6 +30,7 @@ namespace dataset {
// Helper function to compute a default shuffle size // Helper function to compute a default shuffle size
Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
int64_t *shuffle_size) { int64_t *shuffle_size) {
RETURN_UNEXPECTED_IF_NULL(shuffle_size);
const int64_t average_files_multiplier = 4; const int64_t average_files_multiplier = 4;
const int64_t shuffle_max = 10000; const int64_t shuffle_max = 10000;
int64_t avg_rows_per_file = 0; int64_t avg_rows_per_file = 0;
@ -59,6 +60,7 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro
// Helper function to inject a shuffle operator over top of current operator being built // Helper function to inject a shuffle operator over top of current operator being built
Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) { int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) {
RETURN_UNEXPECTED_IF_NULL(shuffle_op);
int64_t shuffle_size = 0; int64_t shuffle_size = 0;
RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size)); RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size));
MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size; MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size;

View File

@ -59,6 +59,7 @@ void MapNode::Print(std::ostream &out) const {
} }
Status MapNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { Status MapNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
RETURN_UNEXPECTED_IF_NULL(node_ops);
std::vector<std::shared_ptr<TensorOp>> tensor_ops; std::vector<std::shared_ptr<TensorOp>> tensor_ops;
// Build tensorOp from tensorOperation vector // Build tensorOp from tensorOperation vector
@ -131,12 +132,16 @@ Status MapNode::ValidateParams() {
// Visitor accepting method for IRNodePass // Visitor accepting method for IRNodePass
Status MapNode::Accept(IRNodePass *const p, bool *const modified) { Status MapNode::Accept(IRNodePass *const p, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(p);
RETURN_UNEXPECTED_IF_NULL(modified);
// Downcast shared pointer then call visitor // Downcast shared pointer then call visitor
return p->Visit(shared_from_base<MapNode>(), modified); return p->Visit(shared_from_base<MapNode>(), modified);
} }
// Visitor accepting method for IRNodePass // Visitor accepting method for IRNodePass
Status MapNode::AcceptAfter(IRNodePass *const p, bool *const modified) { Status MapNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(p);
RETURN_UNEXPECTED_IF_NULL(modified);
// Downcast shared pointer then call visitor // Downcast shared pointer then call visitor
return p->VisitAfter(shared_from_base<MapNode>(), modified); return p->VisitAfter(shared_from_base<MapNode>(), modified);
} }
@ -147,6 +152,7 @@ void MapNode::setOperations(const std::vector<std::shared_ptr<TensorOperation>>
std::vector<std::shared_ptr<TensorOperation>> MapNode::operations() { return operations_; } std::vector<std::shared_ptr<TensorOperation>> MapNode::operations() { return operations_; }
Status MapNode::to_json(nlohmann::json *out_json) { Status MapNode::to_json(nlohmann::json *out_json) {
RETURN_UNEXPECTED_IF_NULL(out_json);
nlohmann::json args; nlohmann::json args;
args["num_parallel_workers"] = num_workers_; args["num_parallel_workers"] = num_workers_;
args["input_columns"] = input_columns_; args["input_columns"] = input_columns_;
@ -160,6 +166,7 @@ Status MapNode::to_json(nlohmann::json *out_json) {
std::vector<nlohmann::json> ops; std::vector<nlohmann::json> ops;
std::vector<int32_t> cbs; std::vector<int32_t> cbs;
for (auto op : operations_) { for (auto op : operations_) {
RETURN_UNEXPECTED_IF_NULL(op);
nlohmann::json op_args; nlohmann::json op_args;
RETURN_IF_NOT_OK(op->to_json(&op_args)); RETURN_IF_NOT_OK(op->to_json(&op_args));
if (op->Name() == "PyFuncOp") { if (op->Name() == "PyFuncOp") {
@ -172,8 +179,8 @@ Status MapNode::to_json(nlohmann::json *out_json) {
} }
} }
args["operations"] = ops; args["operations"] = ops;
std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs), (void)std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs),
[](std::shared_ptr<DSCallback> cb) -> int32_t { return cb->step_size(); }); [](std::shared_ptr<DSCallback> cb) -> int32_t { return cb != nullptr ? cb->step_size() : 0; });
args["callback"] = cbs; args["callback"] = cbs;
*out_json = args; *out_json = args;
return Status::OK(); return Status::OK();

View File

@ -106,8 +106,8 @@ Status AlbumNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_
} }
std::set<std::string> extensions = {".json", ".JSON"}; std::set<std::string> extensions = {".json", ".JSON"};
while (dirItr->hasNext()) { while (dirItr->HasNext()) {
Path file = dirItr->next(); Path file = dirItr->Next();
if (extensions.empty() || extensions.find(file.Extension()) != extensions.end()) { if (extensions.empty() || extensions.find(file.Extension()) != extensions.end()) {
num_rows += 1; num_rows += 1;
} }

View File

@ -73,9 +73,9 @@ Status GeneratorNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_
RETURN_IF_NOT_OK(data_schema->LoadSchemaString(schema_json_string, {})); RETURN_IF_NOT_OK(data_schema->LoadSchemaString(schema_json_string, {}));
for (int32_t i = 0; i < data_schema->NumColumns(); i++) { for (int32_t i = 0; i < data_schema->NumColumns(); i++) {
ColDescriptor col = data_schema->column(i); ColDescriptor col = data_schema->Column(i);
column_names_.push_back(col.name()); column_names_.push_back(col.Name());
column_types_.push_back((col.type())); column_types_.push_back((col.Type()));
} }
} }
std::shared_ptr<SamplerRT> sampler_rt = nullptr; std::shared_ptr<SamplerRT> sampler_rt = nullptr;

View File

@ -131,7 +131,7 @@ Status RandomNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size
*dataset_size = dataset_size_; *dataset_size = dataset_size_;
return Status::OK(); return Status::OK();
} }
int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->num_rows(); int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->NumRows();
*dataset_size = num_rows; *dataset_size = num_rows;
dataset_size_ = *dataset_size; dataset_size_ = *dataset_size;
return Status::OK(); return Status::OK();

View File

@ -22,6 +22,7 @@
#include <utility> #include <utility>
#include <vector> #include <vector>
#include "debug/common.h"
#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
#include "minddata/dataset/engine/jagged_connector.h" #include "minddata/dataset/engine/jagged_connector.h"
#include "minddata/dataset/engine/opt/pass.h" #include "minddata/dataset/engine/opt/pass.h"
@ -58,13 +59,9 @@ Status TFRecordNode::ValidateParams() {
} }
for (const auto &f : dataset_files_) { for (const auto &f : dataset_files_) {
Path dataset_file(f); auto realpath = Common::GetRealPath(f);
if (!dataset_file.Exists()) { CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(),
std::string err_msg = "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist."; "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist.");
MS_LOG(ERROR) << err_msg;
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
}
} }
if (num_samples_ < 0) { if (num_samples_ < 0) {
@ -107,6 +104,7 @@ Status TFRecordNode::ValidateParams() {
// Function to build TFRecordNode // Function to build TFRecordNode
Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
RETURN_UNEXPECTED_IF_NULL(node_ops);
// Sort the datasets file in a lexicographical order // Sort the datasets file in a lexicographical order
std::vector<std::string> sorted_dir_files = dataset_files_; std::vector<std::string> sorted_dir_files = dataset_files_;
std::sort(sorted_dir_files.begin(), sorted_dir_files.end()); std::sort(sorted_dir_files.begin(), sorted_dir_files.end());
@ -165,6 +163,8 @@ Status TFRecordNode::GetShardId(int32_t *const shard_id) {
// Get Dataset size // Get Dataset size
Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate, Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
int64_t *dataset_size) { int64_t *dataset_size) {
RETURN_UNEXPECTED_IF_NULL(size_getter);
RETURN_UNEXPECTED_IF_NULL(dataset_size);
if (dataset_size_ > 0) { if (dataset_size_ > 0) {
*dataset_size = dataset_size_; *dataset_size = dataset_size_;
return Status::OK(); return Status::OK();
@ -189,6 +189,7 @@ Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &si
// Get the file list of the specific shard ID // Get the file list of the specific shard ID
Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames) { Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames) {
RETURN_UNEXPECTED_IF_NULL(shard_filenames);
if (!shard_filenames->empty()) { if (!shard_filenames->empty()) {
RETURN_STATUS_UNEXPECTED("The initial file list must be empty."); RETURN_STATUS_UNEXPECTED("The initial file list must be empty.");
} }
@ -201,6 +202,7 @@ Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames)
} }
Status TFRecordNode::to_json(nlohmann::json *out_json) { Status TFRecordNode::to_json(nlohmann::json *out_json) {
RETURN_UNEXPECTED_IF_NULL(out_json);
nlohmann::json args; nlohmann::json args;
args["num_parallel_workers"] = num_workers_; args["num_parallel_workers"] = num_workers_;
args["dataset_files"] = dataset_files_; args["dataset_files"] = dataset_files_;
@ -262,6 +264,7 @@ Status TFRecordNode::from_json(nlohmann::json json_obj, std::shared_ptr<DatasetN
// inherit this sampler from the leaf, providing sampling support from the caching layer. // inherit this sampler from the leaf, providing sampling support from the caching layer.
// That is why we setup the sampler for a leaf node that does not use sampling. // That is why we setup the sampler for a leaf node that does not use sampling.
Status TFRecordNode::SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) { Status TFRecordNode::SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) {
RETURN_UNEXPECTED_IF_NULL(sampler);
bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles); bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles);
*sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_); *sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_);
return Status::OK(); return Status::OK();
@ -281,12 +284,16 @@ Status TFRecordNode::MakeSimpleProducer() {
// Visitor accepting method for IRNodePass // Visitor accepting method for IRNodePass
Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) { Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(p);
RETURN_UNEXPECTED_IF_NULL(modified);
// Downcast shared pointer then call visitor // Downcast shared pointer then call visitor
return p->Visit(shared_from_base<TFRecordNode>(), modified); return p->Visit(shared_from_base<TFRecordNode>(), modified);
} }
// Visitor accepting method for IRNodePass // Visitor accepting method for IRNodePass
Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) { Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(p);
RETURN_UNEXPECTED_IF_NULL(modified);
// Downcast shared pointer then call visitor // Downcast shared pointer then call visitor
return p->VisitAfter(shared_from_base<TFRecordNode>(), modified); return p->VisitAfter(shared_from_base<TFRecordNode>(), modified);
} }

View File

@ -43,6 +43,7 @@ class JaggedConnector : public Connector<TensorRow> {
} }
Status Pop(int32_t worker_id, TensorRow *result) noexcept override { Status Pop(int32_t worker_id, TensorRow *result) noexcept override {
RETURN_UNEXPECTED_IF_NULL(result);
{ {
MS_ASSERT(worker_id < num_consumers_); MS_ASSERT(worker_id < num_consumers_);
std::unique_lock<std::mutex> lock(m_); std::unique_lock<std::mutex> lock(m_);
@ -53,7 +54,7 @@ class JaggedConnector : public Connector<TensorRow> {
} }
RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result)); RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result));
if (result->eoe()) { if (result != nullptr && result->eoe()) {
is_queue_finished_[pop_from_] = true; is_queue_finished_[pop_from_] = true;
} }

View File

@ -32,12 +32,14 @@ namespace mindspore {
namespace dataset { namespace dataset {
Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modified) { Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
std::vector<std::shared_ptr<TensorOperation>> ops = node->operations(); std::vector<std::shared_ptr<TensorOperation>> ops = node->operations();
// start temporary code, to deal with pre-built TensorOperation // start temporary code, to deal with pre-built TensorOperation
std::vector<std::string> pattern = {kDecodeOp, kRandomCropAndResizeOp}; std::vector<std::string> pattern = {kDecodeOp, kRandomCropAndResizeOp};
auto itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(), auto itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(),
[](auto op, const std::string &nm) { return op->Name() == nm; }); [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; });
if (itr != ops.end()) { if (itr != ops.end()) {
MS_LOG(WARNING) << "Fusing pre-build Decode and RandomCropResize into one pre-build."; MS_LOG(WARNING) << "Fusing pre-build Decode and RandomCropResize into one pre-build.";
auto fused_op = dynamic_cast<RandomCropAndResizeOp *>((*(itr + 1))->Build().get()); auto fused_op = dynamic_cast<RandomCropAndResizeOp *>((*(itr + 1))->Build().get());
@ -52,7 +54,7 @@ Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modi
// logic below is for non-prebuilt TensorOperation // logic below is for non-prebuilt TensorOperation
pattern = {vision::kDecodeOperation, vision::kRandomResizedCropOperation}; pattern = {vision::kDecodeOperation, vision::kRandomResizedCropOperation};
itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(), itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(),
[](auto op, const std::string &nm) { return op->Name() == nm; }); [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; });
// return here if no pattern is found // return here if no pattern is found
RETURN_OK_IF_TRUE(itr == ops.end()); RETURN_OK_IF_TRUE(itr == ops.end());

View File

@ -27,6 +27,8 @@ namespace dataset {
// this will become the RootNode:DatasetNode when it is turned on // this will become the RootNode:DatasetNode when it is turned on
Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) { Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(root_ir);
RETURN_UNEXPECTED_IF_NULL(modified);
uint8_t config = GlobalContext::config_manager()->get_auto_worker_config(); uint8_t config = GlobalContext::config_manager()->get_auto_worker_config();
OpWeightPass pass(kOpWeightConfigs[config < kOpWeightConfigs.size() ? config : 0]); OpWeightPass pass(kOpWeightConfigs[config < kOpWeightConfigs.size() ? config : 0]);
@ -46,6 +48,8 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con
// get the maximum weight of all the ops, this value is used to ensure the ratio of num_workers between ops // get the maximum weight of all the ops, this value is used to ensure the ratio of num_workers between ops
float max_weight = 0; float max_weight = 0;
for (const auto &p : pass.weight_profile_) max_weight = std::max(max_weight, p.second); for (const auto &p : pass.weight_profile_) max_weight = std::max(max_weight, p.second);
CHECK_FAIL_RETURN_UNEXPECTED(max_weight != 0, "Internal error, doesn't allow divide zero.");
RETURN_IF_NOT_OK(pass.Run(root_ir, modified)); RETURN_IF_NOT_OK(pass.Run(root_ir, modified));
constexpr size_t max_num_ops = 3; constexpr size_t max_num_ops = 3;
if (pass.parallel_ops_.size() > max_num_ops) { if (pass.parallel_ops_.size() > max_num_ops) {
@ -53,6 +57,7 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con
<< "1 batch and 1 map. AutoNumWorker may not be optimal for usage on complex pipelines."; << "1 batch and 1 map. AutoNumWorker may not be optimal for usage on complex pipelines.";
} }
CHECK_FAIL_RETURN_UNEXPECTED(pass.weight_sum_ != 0, "Internal error, doesn't allow divide zero.");
for (auto &p : pass.parallel_ops_) { for (auto &p : pass.parallel_ops_) {
// get the num worker via the weight ratio // get the num worker via the weight ratio
int32_t num_workers = std::ceil((thread_cnt_ * p.second) / (pass.weight_sum_ * num_shards)); int32_t num_workers = std::ceil((thread_cnt_ * p.second) / (pass.weight_sum_ * num_shards));

View File

@ -33,6 +33,8 @@ RepeatPass::RepeatPass()
// Identifies the subtree below this node as being in a repeated path of the tree. // Identifies the subtree below this node as being in a repeated path of the tree.
Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) { Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// If this is an infinite repeat under infinite repeat/epoch, adjust current num_repeats_. // If this is an infinite repeat under infinite repeat/epoch, adjust current num_repeats_.
// Otherwise, after multiplication it would become positive and this repeat wouldn't run infinitely. // Otherwise, after multiplication it would become positive and this repeat wouldn't run infinitely.
if (node->Count() == DatasetOp::kInfiniteRepeat && num_repeats_ < 0) { if (node->Count() == DatasetOp::kInfiniteRepeat && num_repeats_ < 0) {
@ -56,6 +58,8 @@ Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified)
// Identifies the subtree below this node as being in a repeated path of the tree. // Identifies the subtree below this node as being in a repeated path of the tree.
Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modified) { Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// Get the total number of epochs from the EpochCtrlOp parameter // Get the total number of epochs from the EpochCtrlOp parameter
num_epochs_ = node->Count(); num_epochs_ = node->Count();
// Every node below this EpochCtrlOp should be repeated for num_epochs_ times. // Every node below this EpochCtrlOp should be repeated for num_epochs_ times.
@ -69,6 +73,8 @@ Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modifi
#ifndef ENABLE_ANDROID #ifndef ENABLE_ANDROID
// Identifies the subtree below this node as being in a cache merge path // Identifies the subtree below this node as being in a cache merge path
Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modified) { Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// Turn on the flag that we're under a merge op // Turn on the flag that we're under a merge op
is_merge_ = true; is_merge_ = true;
return Status::OK(); return Status::OK();
@ -76,6 +82,8 @@ Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modif
// Identifies the subtree below this node as being cached // Identifies the subtree below this node as being cached
Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified) { Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// Turn on the flag that we're under a merge op // Turn on the flag that we're under a merge op
is_cached_ = true; is_cached_ = true;
return Status::OK(); return Status::OK();
@ -84,6 +92,8 @@ Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified)
// Hooks up any identified eoe nodes under this repeat. // Hooks up any identified eoe nodes under this repeat.
Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modified) { Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up // We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up
// and set its total repeats. It is important that the op is removed from the save area, // and set its total repeats. It is important that the op is removed from the save area,
// because the merge op above us may also take action on it later for a different case when // because the merge op above us may also take action on it later for a different case when
@ -103,12 +113,16 @@ Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modi
// The total repeats of nodes above this Repeat(n) have nothing to do with this RepeatOp's parameter n. // The total repeats of nodes above this Repeat(n) have nothing to do with this RepeatOp's parameter n.
// But num_repeats_ has been multiplied by n during this Repeat(n)'s PreRunOnNode, // But num_repeats_ has been multiplied by n during this Repeat(n)'s PreRunOnNode,
// so we divide num_repeats_ by n to be able to correctly set total repeats for nodes above this RepeatOp. // so we divide num_repeats_ by n to be able to correctly set total repeats for nodes above this RepeatOp.
CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0.");
num_repeats_ /= node->Count(); num_repeats_ /= node->Count();
return Status::OK(); return Status::OK();
} }
// Hooks up any identified eoe nodes under this repeat. // Hooks up any identified eoe nodes under this repeat.
Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const modified) { Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0.");
node->SetTotalRepeats(num_repeats_); node->SetTotalRepeats(num_repeats_);
node->SetNumEpochs(num_epochs_); node->SetNumEpochs(num_epochs_);
// We finish the walk of this EpochCtrl's descendent nodes. // We finish the walk of this EpochCtrl's descendent nodes.
@ -119,6 +133,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const m
// All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up // All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up
// for use with a controlling repeat above it. // for use with a controlling repeat above it.
Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) { Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// If we are under a cache op, then save ourselves to the cached op stack. // If we are under a cache op, then save ourselves to the cached op stack.
if (is_cached_) { if (is_cached_) {
AddToCachedNodeStack(node); AddToCachedNodeStack(node);
@ -132,6 +148,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const mod
#ifndef ENABLE_ANDROID #ifndef ENABLE_ANDROID
// CacheOp removes previous leaf ops and replaces them with itself // CacheOp removes previous leaf ops and replaces them with itself
Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modified) { Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
is_cached_ = false; is_cached_ = false;
// if we are a cache within a repeat path of the tree, then adjust the total repeats and total epochs for cached ops. // if we are a cache within a repeat path of the tree, then adjust the total repeats and total epochs for cached ops.
@ -153,6 +171,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modif
// Turns off the tracking for operations under merge op // Turns off the tracking for operations under merge op
Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const modified) { Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// If there was not any repeat in the merge cache miss leg, then the cache_lookup // If there was not any repeat in the merge cache miss leg, then the cache_lookup
// would not have been consumed yet. In that case, we need to set its total repeats for it. // would not have been consumed yet. In that case, we need to set its total repeats for it.
if (cache_lookup_) { if (cache_lookup_) {
@ -168,6 +188,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const
// Saves the lookup up in case it needs to be referenced by a repeat // Saves the lookup up in case it needs to be referenced by a repeat
Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const modified) { Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
if (!node->IsLeaf()) { if (!node->IsLeaf()) {
// By definition, the CacheLookup must be a leaf op. Make that clear here. // By definition, the CacheLookup must be a leaf op. Make that clear here.
RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!"); RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!");
@ -185,6 +207,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const
#endif #endif
Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) { Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
// Set total repeats and total epochs for the TransferNode // Set total repeats and total epochs for the TransferNode
node->SetTotalRepeats(num_epochs_); node->SetTotalRepeats(num_epochs_);
node->SetNumEpochs(num_epochs_); node->SetNumEpochs(num_epochs_);
@ -192,7 +216,12 @@ Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const mo
} }
// Adds an operator to the cached operator stack save area // Adds an operator to the cached operator stack save area
void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) { cached_node_stacks_.push(node); } void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) {
if (node == nullptr) {
return;
}
cached_node_stacks_.push(node);
}
// Pops an operator from the cached operator stack save area // Pops an operator from the cached operator stack save area
std::shared_ptr<DatasetNode> RepeatPass::PopFromCachedNodeStack() { std::shared_ptr<DatasetNode> RepeatPass::PopFromCachedNodeStack() {

View File

@ -29,6 +29,10 @@ EpochCtrlPass::InjectionFinder::InjectionFinder(std::shared_ptr<DatasetNode> nod
// Performs finder work for BuildVocabOp that has special rules about epoch control injection // Performs finder work for BuildVocabOp that has special rules about epoch control injection
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, bool *const modified) { Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0,
"Invalid data, the node of child should greater than zero.");
// The injection is at the child of the root node // The injection is at the child of the root node
injection_point_ = node->Children()[0]; injection_point_ = node->Children()[0];
num_epochs_ = node->num_epochs(); num_epochs_ = node->num_epochs();
@ -37,6 +41,8 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, boo
// Performs finder work for BuildVocabOp that has special rules about epoch control injection // Performs finder work for BuildVocabOp that has special rules about epoch control injection
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> node, bool *const modified) { Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
injection_point_ = nullptr; injection_point_ = nullptr;
return Status::OK(); return Status::OK();
} }
@ -44,12 +50,18 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> nod
#ifndef ENABLE_ANDROID #ifndef ENABLE_ANDROID
// Performs finder work for BuildSentencePieceVocabNode that has special rules about epoch control injection // Performs finder work for BuildSentencePieceVocabNode that has special rules about epoch control injection
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildSentenceVocabNode> node, bool *const modified) { Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildSentenceVocabNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
injection_point_ = nullptr; injection_point_ = nullptr;
return Status::OK(); return Status::OK();
} }
#endif #endif
Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) { Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(modified);
CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0,
"Invalid data, the node of child should greater than zero.");
// Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here. // Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here.
// Move the injection point to the child of this node. // Move the injection point to the child of this node.
injection_point_ = node->Children()[0]; injection_point_ = node->Children()[0];
@ -61,6 +73,8 @@ EpochCtrlPass::EpochCtrlPass() {}
// Runs an injection pass to inject in operators needed at the pre pass stage // Runs an injection pass to inject in operators needed at the pre pass stage
Status EpochCtrlPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) { Status EpochCtrlPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) {
RETURN_UNEXPECTED_IF_NULL(root_ir);
RETURN_UNEXPECTED_IF_NULL(modified);
MS_LOG(INFO) << "Pre pass: Injection pass started."; MS_LOG(INFO) << "Pre pass: Injection pass started.";
// First, run the finder to perform any injection info before we can go ahead to drive the op injection work. // First, run the finder to perform any injection info before we can go ahead to drive the op injection work.

View File

@ -53,8 +53,8 @@ json ConnectorSize::ParseOpInfo(const DatasetOp &node, const std::vector<int32_t
auto children = node.Children(); auto children = node.Children();
std::vector<int32_t> children_id; std::vector<int32_t> children_id;
std::transform(children.begin(), children.end(), std::back_inserter(children_id), (void)std::transform(children.begin(), children.end(), std::back_inserter(children_id),
[](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); }); [](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op->id(); });
if (!children_id.empty()) { if (!children_id.empty()) {
json_node["children"] = children_id; json_node["children"] = children_id;
} }

View File

@ -29,6 +29,9 @@ namespace dataset {
// temporary helper // temporary helper
int ConnectorThroughput::InitNodes() { int ConnectorThroughput::InitNodes() {
if (tree_ == nullptr) {
return 0;
}
auto it = (*tree_).begin(); auto it = (*tree_).begin();
return it.NumNodes(); return it.NumNodes();
} }
@ -43,15 +46,16 @@ Status ConnectorThroughput::Sample() {
out_row_count_row[col] = cur_out_rows_count; out_row_count_row[col] = cur_out_rows_count;
auto sz = timestamps_.size(); auto sz = timestamps_.size();
cur_time = std::chrono::steady_clock::now(); cur_time = std::chrono::steady_clock::now();
double dt = 0; double data_time = 0;
if (sz > 1) { if (sz > 1) {
auto _dt = std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]); auto full_time =
dt = std::chrono::duration<double>(_dt).count(); std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]);
data_time = std::chrono::duration<double>(full_time).count();
} }
auto prev_out_rows_count = out_row_count_table_[col][out_row_count_table_.size() - 1]; auto prev_out_rows_count = out_row_count_table_[col][out_row_count_table_.size() - 1];
if (dt != 0) { if (data_time != 0) {
const int32_t multiplier = 1000; const int32_t multiplier = 1000;
auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * dt); auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * data_time);
throughput_row[col] = thr; throughput_row[col] = thr;
} else { } else {
throughput_row[col] = 0; throughput_row[col] = 0;
@ -70,7 +74,7 @@ json ConnectorThroughput::ParseOpInfo(const DatasetOp &node, const std::vector<d
auto children = node.Children(); auto children = node.Children();
std::vector<int32_t> children_id; std::vector<int32_t> children_id;
std::transform(children.begin(), children.end(), std::back_inserter(children_id), std::transform(children.begin(), children.end(), std::back_inserter(children_id),
[](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); }); [](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op ? op->id() : 0; });
json json_node; json json_node;
json_node["op_id"] = node.id(); json_node["op_id"] = node.id();
json_node["op_type"] = node.Name(); json_node["op_type"] = node.Name();
@ -100,8 +104,10 @@ Status ConnectorThroughput::SaveToFile() {
int col = 0; int col = 0;
for (auto &node : *tree_) { for (auto &node : *tree_) {
std::vector<double> throughput; std::vector<double> throughput;
for (auto i = 0; i < throughput_.size(); i++) { if (throughput_.size() > col) {
throughput.push_back(throughput_[col][i]); for (auto i = 0; i < throughput_[col].size(); i++) {
throughput.push_back(throughput_[col][i]);
}
} }
if (!path.Exists()) { if (!path.Exists()) {

View File

@ -18,9 +18,9 @@
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
#include <sys/syscall.h> #include <sys/syscall.h>
#endif #endif
#include <algorithm>
#include <cmath> #include <cmath>
#include <cstdio> #include <cstdio>
#include <algorithm>
#include <fstream> #include <fstream>
#include <memory> #include <memory>
#include <string> #include <string>
@ -33,8 +33,8 @@
using json = nlohmann::json; using json = nlohmann::json;
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
bool BaseCpu::fetched_all_process_shared = false; bool BaseCpu::fetched_all_process_shared_ = false;
std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared = {}; std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared_ = {};
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
#define USING_LINUX #define USING_LINUX
@ -46,8 +46,8 @@ BaseCpu::BaseCpu() {
pre_cpu_stat_.io_stat_ = 0; pre_cpu_stat_.io_stat_ = 0;
pre_cpu_stat_.idle_stat_ = 0; pre_cpu_stat_.idle_stat_ = 0;
pre_cpu_stat_.total_stat_ = 0; pre_cpu_stat_.total_stat_ = 0;
fetched_all_process = false; fetched_all_process_ = false;
pre_fetched_state = false; pre_fetched_state_ = false;
cpu_processor_num_ = 0; cpu_processor_num_ = 0;
} }
@ -157,6 +157,7 @@ Status DeviceCpu::Collect(const ExecutionTree *tree) {
return Status::OK(); return Status::OK();
} }
Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
RETURN_UNEXPECTED_IF_NULL(name);
name->clear(); name->clear();
name->append("device_info"); name->append("device_info");
int total_samples = cpu_util_.size(); int total_samples = cpu_util_.size();
@ -221,6 +222,7 @@ Status DeviceCpu::SaveToFile(const std::string &file_path) {
Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id, Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat) { std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat) {
RETURN_UNEXPECTED_IF_NULL(op_stat);
pid_t pid = 0; pid_t pid = 0;
#if defined(USING_LINUX) #if defined(USING_LINUX)
pid = syscall(SYS_getpid); pid = syscall(SYS_getpid);
@ -257,11 +259,12 @@ Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
} }
Status OperatorCpu::Collect(const ExecutionTree *tree) { Status OperatorCpu::Collect(const ExecutionTree *tree) {
RETURN_UNEXPECTED_IF_NULL(tree);
if (first_collect_) { if (first_collect_) {
for (auto iter = tree->begin(); iter != tree->end(); ++iter) { for (auto iter = tree->begin(); iter != tree->end(); ++iter) {
id_count_++; id_count_++;
op_name[iter->id()] = iter->NameWithID(); op_name_[iter->id()] = iter->NameWithID();
op_parallel_workers[iter->id()] = iter->num_workers(); op_parallel_workers_[iter->id()] = iter->num_workers();
} }
#if defined(USING_LINUX) #if defined(USING_LINUX)
cpu_processor_num_ = get_nprocs_conf(); cpu_processor_num_ = get_nprocs_conf();
@ -269,34 +272,34 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
} }
// Obtain the op and thread mapping // Obtain the op and thread mapping
op_thread.clear(); op_thread_.clear();
List<Task> allTasks = tree->AllTasks()->GetTask(); List<Task> allTasks = tree->AllTasks()->GetTask();
for (auto &task1 : allTasks) { for (auto &task1 : allTasks) {
int32_t op_id = task1.get_operator_id(); int32_t op_id = task1.get_operator_id();
op_thread[op_id].emplace_back(task1.get_linux_id()); op_thread_[op_id].emplace_back(task1.get_linux_id());
} }
// add process id into op_thread // add process id into op_thread
if (!fetched_all_process) { if (!fetched_all_process_) {
{ {
py::gil_scoped_acquire gil_acquire; py::gil_scoped_acquire gil_acquire;
py::module ds = py::module::import("mindspore.dataset.engine.datasets"); py::module ds = py::module::import("mindspore.dataset.engine.datasets");
py::tuple process_info = ds.attr("_get_operator_process")(); py::tuple process_info = ds.attr("_get_operator_process")();
py::dict sub_process = py::reinterpret_borrow<py::dict>(process_info[0]); py::dict sub_process = py::reinterpret_borrow<py::dict>(process_info[0]);
fetched_all_process = py::reinterpret_borrow<py::bool_>(process_info[1]); fetched_all_process_ = py::reinterpret_borrow<py::bool_>(process_info[1]);
// parse dict value // parse dict value
op_process = toIntMap(sub_process); op_process_ = toIntMap(sub_process);
BaseCpu::op_process_shared = op_process; BaseCpu::op_process_shared_ = op_process_;
BaseCpu::fetched_all_process_shared = fetched_all_process; BaseCpu::fetched_all_process_shared_ = fetched_all_process_;
} }
// judge whether there is device_que operator, if so operator id may need increase by one, temp use directly // judge whether there is device_que operator, if so operator id may need increase by one, temp use directly
for (auto item : op_process) { for (auto item : op_process_) {
if (!item.second.empty()) { if (!item.second.empty()) {
if (op_thread.find(item.first) != op_thread.end()) { if (op_thread_.find(item.first) != op_thread_.end()) {
op_thread[item.first].insert(op_thread[item.first].end(), item.second.begin(), item.second.end()); op_thread_[item.first].insert(op_thread_[item.first].end(), item.second.begin(), item.second.end());
} else { } else {
op_thread[item.first] = item.second; op_thread_[item.first] = item.second;
} }
} }
} }
@ -310,16 +313,15 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
if (!first_collect_) { if (!first_collect_) {
// obtain all the op id in current tasks // obtain all the op id in current tasks
std::vector<int32_t> total_op_id; std::vector<int32_t> total_op_id;
for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) { (void)std::transform(op_thread_.begin(), op_thread_.end(), std::back_inserter(total_op_id),
total_op_id.emplace_back(iter->first); [](const auto &iter) { return iter.first; });
}
// iter all the op, and obtain the CPU utilization of each operator // iter all the op, and obtain the CPU utilization of each operator
for (auto op_id = -1; op_id < id_count_; op_id++) { for (auto op_id = -1; op_id < id_count_; op_id++) {
float user_util = 0, sys_util = 0; float user_util = 0, sys_util = 0;
auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id); auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id);
if (iter != total_op_id.end()) { if (iter != total_op_id.end()) {
for (auto thread_id : op_thread[op_id]) { for (auto thread_id : op_thread_[op_id]) {
if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) { if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) {
user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 / user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 /
(total_stat_ - pre_total_stat_) * 100; (total_stat_ - pre_total_stat_) * 100;
@ -329,7 +331,7 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
} }
} }
CpuOpUtil info; CpuOpUtil info;
info.op_id = op_id; info.op_id_ = op_id;
info.sys_utilization_ = sys_util; info.sys_utilization_ = sys_util;
info.user_utilization_ = user_util; info.user_utilization_ = user_util;
cpu_step_util_.emplace_back(info); cpu_step_util_.emplace_back(info);
@ -337,10 +339,10 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
cpu_op_util_.emplace_back(cpu_step_util_); cpu_op_util_.emplace_back(cpu_step_util_);
} else { } else {
// mainly obtain the init CPU execute time in first collect // mainly obtain the init CPU execute time in first collect
for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) { for (const auto &iter : op_thread_) {
int32_t op_id = iter->first; int32_t op_id = iter.first;
for (auto thread_id_ : iter->second) { for (auto thread_id_ : iter.second) {
// ignore errors in the first collect // ParseCpuInfo may execute failed for cpu data not ready, but we still get next thread cpu info
(void)ParseCpuInfo(op_id, thread_id_, &op_stat_); (void)ParseCpuInfo(op_id, thread_id_, &op_stat_);
} }
} }
@ -355,6 +357,8 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
} }
Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
RETURN_UNEXPECTED_IF_NULL(name);
RETURN_UNEXPECTED_IF_NULL(extra_message);
int total_samples = cpu_op_util_.size(); int total_samples = cpu_op_util_.size();
// Only analyze the middle half of the samples // Only analyze the middle half of the samples
@ -374,15 +378,15 @@ Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string
sum += cpu_op_util_[i][index].sys_utilization_; sum += cpu_op_util_[i][index].sys_utilization_;
} }
if ((end_analyze - start_analyze) > 0) { if ((end_analyze - start_analyze) > 0) {
op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers[op_id] * (end_analyze - start_analyze)); op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers_[op_id] * (end_analyze - start_analyze));
} }
if (op_util > *utilization) { if (op_util > *utilization) {
*utilization = op_util; *utilization = op_util;
name->clear(); name->clear();
name->append(op_name[op_id]); (void)name->append(op_name_[op_id]);
} }
extra_message->append(op_name[op_id] + " utiliization per thread: " + std::to_string(op_util) + "% (" + (void)extra_message->append(op_name_[op_id] + " utilization per thread: " + std::to_string(op_util) + "% (" +
std::to_string(op_parallel_workers[op_id]) + " parallel_workers); "); std::to_string(op_parallel_workers_[op_id]) + " parallel_workers); ");
} }
return Status::OK(); return Status::OK();
} }
@ -428,24 +432,24 @@ Status ProcessCpu::ParseCpuInfo() {
uint64_t total_stat_; uint64_t total_stat_;
RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_)); RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_));
if (!pre_fetched_state) { if (!pre_fetched_state_) {
process_id.clear(); process_id_.clear();
pid_t main_pid = 0; pid_t main_pid = 0;
#if defined(USING_LINUX) #if defined(USING_LINUX)
main_pid = syscall(SYS_getpid); main_pid = syscall(SYS_getpid);
#endif #endif
process_id.emplace_back(main_pid); process_id_.emplace_back(main_pid);
op_process = BaseCpu::op_process_shared; op_process_ = BaseCpu::op_process_shared_;
fetched_all_process = BaseCpu::fetched_all_process_shared; fetched_all_process_ = BaseCpu::fetched_all_process_shared_;
for (auto item : op_process) { for (const auto &item : op_process_) {
for (auto id : item.second) { for (const auto &id : item.second) {
process_id.emplace_back(id); process_id_.emplace_back(id);
} }
} }
} }
float user_util = 0, sys_util = 0; float user_util = 0, sys_util = 0;
for (auto pid : process_id) { for (const auto &pid : process_id_) {
std::string stat_path = "/proc/" + std::to_string(pid) + "/stat"; std::string stat_path = "/proc/" + std::to_string(pid) + "/stat";
std::ifstream file(stat_path); std::ifstream file(stat_path);
@ -479,11 +483,12 @@ Status ProcessCpu::ParseCpuInfo() {
} }
pre_total_stat_ = total_stat_; pre_total_stat_ = total_stat_;
first_collect_ = false; first_collect_ = false;
pre_fetched_state = fetched_all_process; pre_fetched_state_ = fetched_all_process_;
return Status::OK(); return Status::OK();
} }
Status ProcessCpu::Collect(const ExecutionTree *tree) { Status ProcessCpu::Collect(const ExecutionTree *tree) {
RETURN_UNEXPECTED_IF_NULL(tree);
if (first_collect_) { if (first_collect_) {
#if defined(USING_LINUX) #if defined(USING_LINUX)
cpu_processor_num_ = get_nprocs_conf(); cpu_processor_num_ = get_nprocs_conf();
@ -495,6 +500,9 @@ Status ProcessCpu::Collect(const ExecutionTree *tree) {
} }
Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
RETURN_UNEXPECTED_IF_NULL(name);
RETURN_UNEXPECTED_IF_NULL(utilization);
RETURN_UNEXPECTED_IF_NULL(extra_message);
name->clear(); name->clear();
name->append("process_info"); name->append("process_info");
int total_samples = process_util_.size(); int total_samples = process_util_.size();

View File

@ -49,7 +49,7 @@ typedef struct CpuInfo_s {
typedef struct CpuOpInfo_s { typedef struct CpuOpInfo_s {
float user_utilization_; float user_utilization_;
float sys_utilization_; float sys_utilization_;
int32_t op_id; int32_t op_id_;
} CpuOpUtil; } CpuOpUtil;
// CPU utilization of process // CPU utilization of process
@ -78,11 +78,11 @@ class BaseCpu {
protected: protected:
std::vector<CpuUtil> cpu_util_; std::vector<CpuUtil> cpu_util_;
CpuStat pre_cpu_stat_; CpuStat pre_cpu_stat_;
static bool fetched_all_process_shared; static bool fetched_all_process_shared_;
static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared; static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared_;
bool fetched_all_process; bool fetched_all_process_;
bool pre_fetched_state; bool pre_fetched_state_;
std::unordered_map<int32_t, std::vector<pid_t>> op_process; std::unordered_map<int32_t, std::vector<pid_t>> op_process_;
int32_t cpu_processor_num_; int32_t cpu_processor_num_;
}; };
@ -136,9 +136,9 @@ class OperatorCpu : public BaseCpu {
bool first_collect_; bool first_collect_;
// Store the id and its corresponding threads. // Store the id and its corresponding threads.
std::unordered_map<int32_t, std::vector<pid_t>> op_thread; std::unordered_map<int32_t, std::vector<pid_t>> op_thread_;
std::unordered_map<int32_t, std::string> op_name; std::unordered_map<int32_t, std::string> op_name_;
std::unordered_map<int32_t, int32_t> op_parallel_workers; std::unordered_map<int32_t, int32_t> op_parallel_workers_;
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_; std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_;
uint64_t pre_total_stat_; uint64_t pre_total_stat_;
int32_t id_count_; int32_t id_count_;
@ -161,7 +161,7 @@ class ProcessCpu : public BaseCpu {
std::vector<CpuProcessUtil> process_util_; std::vector<CpuProcessUtil> process_util_;
uint64_t pre_total_stat_; uint64_t pre_total_stat_;
std::unordered_map<int64_t, CpuOpStat> pre_process_stat_; std::unordered_map<int64_t, CpuOpStat> pre_process_stat_;
std::vector<pid_t> process_id; std::vector<pid_t> process_id_;
}; };
// Sampling CPU information // Sampling CPU information

View File

@ -52,7 +52,9 @@ class PerfData {
void AddSample(const T &row) { void AddSample(const T &row) {
auto i = 0; auto i = 0;
for (const auto &e : row) { for (const auto &e : row) {
data_[i++].push_back(e); if (data_.size() > i) {
data_[i++].push_back(e);
}
} }
counter_++; counter_++;
} }
@ -62,7 +64,9 @@ class PerfData {
auto Row(dsize_t idx) { auto Row(dsize_t idx) {
std::vector<V> row(n_cols_); std::vector<V> row(n_cols_);
for (auto i = 0; i < n_cols_; i++) { for (auto i = 0; i < n_cols_; i++) {
row[i] = data_[i][idx]; if (data_.size() > i && data_[i].size() > idx) {
row[i] = data_[i][idx];
}
} }
return row; return row;
} }

View File

@ -51,6 +51,7 @@ Status Tracing::SaveToFile() {
} }
Status Sampling::ReadJson(nlohmann::json *output) { Status Sampling::ReadJson(nlohmann::json *output) {
RETURN_UNEXPECTED_IF_NULL(output);
Path path = Path(file_path_); Path path = Path(file_path_);
if (path.Exists()) { if (path.Exists()) {
MS_LOG(DEBUG) << file_path_ << " exists"; MS_LOG(DEBUG) << file_path_ << " exists";

View File

@ -25,6 +25,8 @@ std::map<std::string, Status (*)(nlohmann::json json_obj, std::shared_ptr<Tensor
Serdes::func_ptr_ = Serdes::InitializeFuncPtr(); Serdes::func_ptr_ = Serdes::InitializeFuncPtr();
Status Serdes::SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json) { Status Serdes::SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json) {
RETURN_UNEXPECTED_IF_NULL(node);
RETURN_UNEXPECTED_IF_NULL(out_json);
// Dump attributes of current node to json string // Dump attributes of current node to json string
nlohmann::json args; nlohmann::json args;
RETURN_IF_NOT_OK(node->to_json(&args)); RETURN_IF_NOT_OK(node->to_json(&args));

View File

@ -48,6 +48,7 @@ TreeAdapter::TreeAdapter(UsageFlag usage) : usage_(usage), launched_(false), tre
} }
Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) { Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
RETURN_UNEXPECTED_IF_NULL(ir);
// Vector of actions in pre-pass phase // Vector of actions in pre-pass phase
std::vector<std::unique_ptr<IRPass>> actions; std::vector<std::unique_ptr<IRPass>> actions;
@ -73,6 +74,7 @@ Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
} }
Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) { Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
RETURN_UNEXPECTED_IF_NULL(ir);
// Vector of optimizations // Vector of optimizations
std::vector<std::unique_ptr<IRNodePass>> optimizations; std::vector<std::unique_ptr<IRNodePass>> optimizations;
MS_LOG(INFO) << "Running optimization pass loops"; MS_LOG(INFO) << "Running optimization pass loops";
@ -89,6 +91,7 @@ Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
} }
Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) { Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
RETURN_UNEXPECTED_IF_NULL(ir);
// Vector of actions in post-pass phase // Vector of actions in post-pass phase
std::vector<std::unique_ptr<IRPass>> actions; std::vector<std::unique_ptr<IRPass>> actions;
MS_LOG(INFO) << "Running post pass loops."; MS_LOG(INFO) << "Running post pass loops.";
@ -118,6 +121,9 @@ Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
} }
Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) { Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) {
RETURN_UNEXPECTED_IF_NULL(ir);
RETURN_UNEXPECTED_IF_NULL(op);
RETURN_UNEXPECTED_IF_NULL(tree_);
// Build the DatasetOp ExecutionTree from the optimized IR tree // Build the DatasetOp ExecutionTree from the optimized IR tree
std::vector<std::shared_ptr<DatasetOp>> ops; std::vector<std::shared_ptr<DatasetOp>> ops;
RETURN_IF_NOT_OK(ir->Build(&ops)); RETURN_IF_NOT_OK(ir->Build(&ops));
@ -133,7 +139,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std
} }
// Build the children of IR, once they return, add the return value to *op // Build the children of IR, once they return, add the return value to *op
for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) { for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) {
std::shared_ptr<DatasetOp> child_op; std::shared_ptr<DatasetOp> child_op;
RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op)); RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op));
RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops
@ -143,6 +149,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std
} }
Status TreeAdapter::Build(std::shared_ptr<DatasetNode> root_ir) { Status TreeAdapter::Build(std::shared_ptr<DatasetNode> root_ir) {
RETURN_UNEXPECTED_IF_NULL(root_ir);
// This will evolve in the long run // This will evolve in the long run
tree_ = std::make_unique<ExecutionTree>(); tree_ = std::make_unique<ExecutionTree>();
// disable profiling if this is only a getter pass // disable profiling if this is only a getter pass

View File

@ -22,6 +22,8 @@ namespace dataset {
TreeAdapterLite::TreeAdapterLite() : root_(nullptr) { tree_ = std::make_unique<ExecutionTree>(); } TreeAdapterLite::TreeAdapterLite() : root_(nullptr) { tree_ = std::make_unique<ExecutionTree>(); }
Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) { Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) {
RETURN_UNEXPECTED_IF_NULL(ir);
RETURN_UNEXPECTED_IF_NULL(op);
// Build the DatasetOp ExecutionTree from the optimized IR tree // Build the DatasetOp ExecutionTree from the optimized IR tree
std::vector<std::shared_ptr<DatasetOp>> ops; std::vector<std::shared_ptr<DatasetOp>> ops;
RETURN_IF_NOT_OK(ir->Build(&ops)); RETURN_IF_NOT_OK(ir->Build(&ops));
@ -41,7 +43,7 @@ Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir,
} }
// Build the children of IR, once they return, add the return value to *op // Build the children of IR, once they return, add the return value to *op
for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) { for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) {
std::shared_ptr<DatasetOp> child_op; std::shared_ptr<DatasetOp> child_op;
RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op)); RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op));
RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops
@ -60,6 +62,7 @@ Status TreeAdapterLite::BuildTree(std::shared_ptr<DatasetNode> root_ir) {
Status TreeAdapterLite::GetNextRow(TensorRow *const row) { Status TreeAdapterLite::GetNextRow(TensorRow *const row) {
RETURN_UNEXPECTED_IF_NULL(root_); RETURN_UNEXPECTED_IF_NULL(root_);
RETURN_IF_NOT_OK(root_->GetNextRowPullMode(row)); RETURN_IF_NOT_OK(root_->GetNextRowPullMode(row));
RETURN_UNEXPECTED_IF_NULL(row);
return Status::OK(); return Status::OK();
} }

View File

@ -19,6 +19,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "minddata/dataset/core/device_tensor.h" #include "minddata/dataset/core/device_tensor.h"
#include "minddata/dataset/core/device_resource.h" #include "minddata/dataset/core/device_resource.h"
@ -30,7 +31,8 @@ namespace mindspore {
namespace dataset { namespace dataset {
class DvppNormalizeOp : public TensorOp { class DvppNormalizeOp : public TensorOp {
public: public:
explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {} explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std)
: mean_(std::move(mean)), std_(std::move(std)) {}
~DvppNormalizeOp() = default; ~DvppNormalizeOp() = default;

View File

@ -18,7 +18,7 @@
#ifndef ENABLE_DVPP_INTERFACE #ifndef ENABLE_DVPP_INTERFACE
#define ENABLE_DVPP_INTERFACE #define ENABLE_DVPP_INTERFACE
#endif #endif
#include <stdio.h> #include <cstdio>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <vector> #include <vector>

View File

@ -13,13 +13,14 @@
* limitations under the License. * limitations under the License.
*/ */
#include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h"
#include <thread>
#include <sys/stat.h>
#include <sys/time.h>
#include "minddata/dataset/include/dataset/constants.h" #include "minddata/dataset/include/dataset/constants.h"
#include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/kernels/image/image_utils.h" #include "minddata/dataset/kernels/image/image_utils.h"
#include "MDAclProcess.h"
#include <sys/time.h>
#include <thread>
#include <sys/stat.h>
namespace { namespace {
const int BUFFER_SIZE = 2048; const int BUFFER_SIZE = 2048;

View File

@ -17,25 +17,25 @@
#define MDACLMANAGER_H #define MDACLMANAGER_H
#include <climits> #include <climits>
#include <string> #include <cstdio>
#include <string.h>
#include <map> #include <map>
#include <iostream> #include <iostream>
#include <memory> #include <memory>
#include <unistd.h>
#include <string>
#include <sys/stat.h>
#include <sys/types.h>
#include "acl/acl.h" #include "acl/acl.h"
#include "CommonDataType.h"
#include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/core/tensor_shape.h"
#include "minddata/dataset/core/data_type.h" #include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
#include "minddata/dataset/kernels/image/dvpp/utils/DvppCommon.h"
#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
#include "mindspore/ccsrc/minddata/dataset/core/device_tensor.h" #include "mindspore/ccsrc/minddata/dataset/core/device_tensor.h"
#include "mindspore/ccsrc/minddata/dataset/core/tensor.h" #include "mindspore/ccsrc/minddata/dataset/core/tensor.h"
#include "mindspore/core/utils/log_adapter.h" #include "mindspore/core/utils/log_adapter.h"
#include "mindspore/ccsrc/minddata/dataset/util/status.h" #include "mindspore/ccsrc/minddata/dataset/util/status.h"
#include "ErrorCode.h"
#include "DvppCommon.h"
#include <stdio.h>
#include <unistd.h>
#include <sys/stat.h>
#include <sys/types.h>
mode_t SetFileDefaultUmask(); mode_t SetFileDefaultUmask();

View File

@ -16,17 +16,18 @@
#ifndef RESOURCEMANAGER_H #ifndef RESOURCEMANAGER_H
#define RESOURCEMANAGER_H #define RESOURCEMANAGER_H
#include <vector> #include <climits>
#include <set>
#include <cstring> #include <cstring>
#include <climits> #include <climits>
#include <unordered_map>
#include <mutex> #include <mutex>
#include "CommonDataType.h" #include <set>
#include "ErrorCode.h"
#include <sys/stat.h> #include <sys/stat.h>
#include <unordered_map>
#include <vector>
#include "mindspore/core/utils/log_adapter.h" #include "mindspore/core/utils/log_adapter.h"
#include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h" #include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h"
#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
enum ModelLoadMethod { enum ModelLoadMethod {
LOAD_FROM_FILE = 0, // Loading from file, memory of model and weights are managed by ACL LOAD_FROM_FILE = 0, // Loading from file, memory of model and weights are managed by ACL

View File

@ -48,7 +48,7 @@ static void GetSobelKernel(float *kernel, int flag, int ksize, double scale) {
buffer[0] = 1, buffer[1] = -2, buffer[2] = 1; buffer[0] = 1, buffer[1] = -2, buffer[2] = 1;
} }
} else { } else {
int old, now; float old, now;
buffer[0] = 1; buffer[0] = 1;
for (int i = 0; i < ksize; i++) { for (int i = 0; i < ksize; i++) {
buffer[i + 1] = 0; buffer[i + 1] = 0;

View File

@ -571,9 +571,8 @@ bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale) {
if (dst.IsEmpty()) { if (dst.IsEmpty()) {
dst.Init(src.width_, src.height_, src.channel_, LDataType::FLOAT32); dst.Init(src.width_, src.height_, src.channel_, LDataType::FLOAT32);
} else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_) { } else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_ ||
return false; dst.data_type_ != LDataType::FLOAT32) {
} else if (dst.data_type_ != LDataType::FLOAT32) {
return false; return false;
} }
@ -662,24 +661,16 @@ bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h) {
} }
static bool CheckZero(const std::vector<float> &vs) { static bool CheckZero(const std::vector<float> &vs) {
for (int i = 0; i < vs.size(); i++) { return std::any_of(vs.begin(), vs.end(), [](const float &v) { return Equal(v, 0.0f); });
if (Equal(vs[i], 0.0f)) {
return true;
}
}
return false;
} }
static bool CheckZero(const std::vector<size_t> &vs) { static bool CheckZero(const std::vector<size_t> &vs) {
for (int i = 0; i < vs.size(); i++) { return std::any_of(vs.begin(), vs.end(), [](const float &v) { return v == 0; });
if (vs[i] == 0) return true;
}
return false;
} }
static bool CheckMeanAndStd(const LiteMat &src, LiteMat &dst, int channel, const std::vector<float> &mean, static bool CheckMeanAndStd(const LiteMat &src, LiteMat &dst, int channel, const std::vector<float> &mean,
const std::vector<float> &std) { const std::vector<float> &std) {
if (mean.size() == 0 && std.size() == 0) { if (mean.empty() && std.empty()) {
return false; return false;
} }
if (src.data_type_ != LDataType::FLOAT32) { if (src.data_type_ != LDataType::FLOAT32) {
@ -935,8 +926,8 @@ bool Merge(const std::vector<LiteMat> &mv, LiteMat &dst) {
LDataType data_type = mv[0].data_type_; LDataType data_type = mv[0].data_type_;
// The arrays in list must be single-channel // The arrays in list must be single-channel
for (int i = 0; i < mv.size(); i++) { if (std::any_of(mv.begin(), mv.end(), [](const LiteMat &m) { return m.channel_ != 1; })) {
if (mv[i].channel_ != 1) return false; return false;
} }
for (int i = 1; i < mv.size(); i++) { for (int i = 1; i < mv.size(); i++) {
@ -998,7 +989,7 @@ bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int ri
return true; return true;
} }
std::vector<std::vector<float>> GetDefaultBoxes(BoxesConfig config) { std::vector<std::vector<float>> GetDefaultBoxes(const BoxesConfig config) {
size_t size = config.num_default.size(); size_t size = config.num_default.size();
if (size <= 1 || config.feature_size.size() != size || config.steps.size() != size || if (size <= 1 || config.feature_size.size() != size || config.steps.size() != size ||
config.aspect_rations.size() != size) { config.aspect_rations.size() != size) {
@ -1116,6 +1107,7 @@ std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std:
} }
} }
std::vector<int> new_order; std::vector<int> new_order;
new_order.reserve(inds.size());
for (int k = 0; k < inds.size(); k++) { for (int k = 0; k < inds.size(); k++) {
new_order.push_back(order[inds[k]]); new_order.push_back(order[inds[k]]);
} }

View File

@ -283,9 +283,7 @@ void LiteMat::Release() {
if (data_ptr_) { if (data_ptr_) {
AlignFree(data_ptr_); AlignFree(data_ptr_);
} }
if (ref_count_) { delete[] ref_count_;
delete[] ref_count_;
}
} }
data_ptr_ = nullptr; data_ptr_ = nullptr;
elem_size_ = 0; elem_size_ = 0;
@ -293,7 +291,7 @@ void LiteMat::Release() {
height_ = 0; height_ = 0;
channel_ = 0; channel_ = 0;
c_step_ = 0; c_step_ = 0;
ref_count_ = 0; ref_count_ = nullptr;
size_ = 0; size_ = 0;
setSteps(0, 0, 0); setSteps(0, 0, 0);
} }
@ -418,7 +416,7 @@ inline void SubtractImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d
} }
inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
if (dst == NULL) { if (dst == nullptr) {
return false; return false;
} }
@ -426,10 +424,7 @@ inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *
return false; return false;
} }
if (src_a.data_type_ != src_b.data_type_) { return src_a.data_type_ == src_b.data_type_;
return false;
}
return true;
} }
bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
@ -585,7 +580,7 @@ inline void DivideImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *dst
} }
inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
if (dst == NULL) { if (dst == nullptr) {
return false; return false;
} }
@ -593,10 +588,7 @@ inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst
return false; return false;
} }
if (src_a.data_type_ != src_b.data_type_) { return src_a.data_type_ == src_b.data_type_;
return false;
}
return true;
} }
bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
@ -693,7 +685,7 @@ inline void MultiplyImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d
} }
inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
if (dst == NULL) { if (dst == nullptr) {
return false; return false;
} }
@ -701,10 +693,7 @@ inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *d
return false; return false;
} }
if (src_a.data_type_ != src_b.data_type_) { return src_a.data_type_ == src_b.data_type_;
return false;
}
return true;
} }
bool Multiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { bool Multiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {

View File

@ -166,15 +166,9 @@ class LDataType {
~LDataType() = default; ~LDataType() = default;
inline Type Value() const { return type_; } inline Type Value() const { return type_; }
inline bool operator==(const LDataType &ps) const { inline bool operator==(const LDataType &ps) const { return this->type_ == ps.type_; }
if (this->type_ == ps.type_) return true;
return false;
}
inline bool operator!=(const LDataType &ps) const { inline bool operator!=(const LDataType &ps) const { return this->type_ != ps.type_; }
if (this->type_ != ps.type_) return true;
return false;
}
uint8_t SizeInBytes() const { uint8_t SizeInBytes() const {
if (type_ < LDataType::NUM_OF_TYPES) if (type_ < LDataType::NUM_OF_TYPES)

View File

@ -381,11 +381,9 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int
} }
if (dst.IsEmpty()) { if (dst.IsEmpty()) {
(void)dst.Init(dst_w, dst_h, src.channel_, LDataType::UINT8); (void)dst.Init(dst_w, dst_h, src.channel_, LDataType::UINT8);
} else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_) { } else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_ ||
dst.data_type_ != LDataType::UINT8) {
return false; return false;
} else if (dst.data_type_ != LDataType::UINT8) {
return false;
} else {
} }
double IM[6]; double IM[6];

View File

@ -182,6 +182,8 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
} catch (std::runtime_error &e) { } catch (std::runtime_error &e) {
return DestroyDecompressAndReturnError(e.what()); return DestroyDecompressAndReturnError(e.what());
} }
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x, "invalid crop width");
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y, "invalid crop height");
if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) { if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
crop_w = cinfo.output_width; crop_w = cinfo.output_width;
crop_h = cinfo.output_height; crop_h = cinfo.output_height;
@ -190,6 +192,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
return DestroyDecompressAndReturnError("Decode: invalid crop size"); return DestroyDecompressAndReturnError("Decode: invalid crop size");
} }
const int mcu_size = cinfo.min_DCT_scaled_size; const int mcu_size = cinfo.min_DCT_scaled_size;
CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "Invalid data.");
unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size; unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned; unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
try { try {
@ -206,8 +209,13 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
const int buffer_size = output_tensor->SizeInBytes(); const int buffer_size = output_tensor->SizeInBytes();
JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>())); JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
// stride refers to output tensor, which has 3 components at most
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - skipped_scanlines) > crop_h,
"Invalid crop height.");
const int max_scanlines_to_read = skipped_scanlines + crop_h; const int max_scanlines_to_read = skipped_scanlines + crop_h;
// stride refers to output tensor, which has 3 components at most // stride refers to output tensor, which has 3 components at most
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
"Invalid crop width.");
const int stride = crop_w * kOutNumComponents; const int stride = crop_w * kOutNumComponents;
// offset is calculated for scanlines read from the image, therefore // offset is calculated for scanlines read from the image, therefore
// has the same number of components as the image // has the same number of components as the image
@ -246,6 +254,8 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8"); RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8");
} }
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h, "Invalid crop height.");
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Invalid crop width.");
// account for integer overflow // account for integer overflow
if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) { if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) {
RETURN_STATUS_UNEXPECTED( RETURN_STATUS_UNEXPECTED(
@ -410,7 +420,10 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation, Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation,
TensorRow *outputs) { TensorRow *outputs) {
outputs->resize(3); outputs->resize(3);
CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() > 0,
"Invalid input, should greater than 0, but got " + std::to_string(inputs.size()));
std::shared_ptr<Tensor> input = inputs[0]; std::shared_ptr<Tensor> input = inputs[0];
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be greater than 3 dimensions.");
LiteMat lite_mat_src(input->shape()[1], input->shape()[0], input->shape()[2], LiteMat lite_mat_src(input->shape()[1], input->shape()[0], input->shape()[2],
const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())), const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
GetLiteCVDataType(input->type())); GetLiteCVDataType(input->type()));
@ -537,7 +550,15 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
std::shared_ptr<Tensor> output_tensor; std::shared_ptr<Tensor> output_tensor;
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_) > pad_left,
"Invalid pad width.");
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_ + pad_left) > pad_right,
"Invalid pad width.");
int pad_width = lite_mat_rgb.width_ + pad_left + pad_right; int pad_width = lite_mat_rgb.width_ + pad_left + pad_right;
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_) > pad_top,
"Invalid pad height.");
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_ + pad_top) > pad_bottom,
"Invalid pad height.");
int pad_height = lite_mat_rgb.height_ + pad_top + pad_bottom; int pad_height = lite_mat_rgb.height_ + pad_top + pad_bottom;
TensorShape new_shape = TensorShape({pad_height, pad_width, input->shape()[2]}); TensorShape new_shape = TensorShape({pad_height, pad_width, input->shape()[2]});
RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
@ -721,11 +742,13 @@ Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
} }
int height = 0; int height = 0;
int width = 0; int width = 0;
CHECK_FAIL_RETURN_UNEXPECTED(mat.size() <= 6, "Invalid mat shape.");
double M[6] = {}; double M[6] = {};
for (int i = 0; i < mat.size(); i++) { for (int i = 0; i < mat.size(); i++) {
M[i] = static_cast<double>(mat[i]); M[i] = static_cast<double>(mat[i]);
} }
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be 3.");
LiteMat lite_mat_rgb(input->shape()[1], input->shape()[0], input->shape()[2], LiteMat lite_mat_rgb(input->shape()[1], input->shape()[0], input->shape()[2],
const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())), const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
GetLiteCVDataType(input->type())); GetLiteCVDataType(input->type()));

View File

@ -22,7 +22,7 @@
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
const int32_t ResizePreserveAROp::kDefImgorientation = 0; const int32_t ResizePreserveAROp::kDefImgOrientation = 0;
ResizePreserveAROp::ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation) ResizePreserveAROp::ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation)
: height_(height), width_(width), img_orientation_(img_orientation) {} : height_(height), width_(width), img_orientation_(img_orientation) {}

View File

@ -34,9 +34,9 @@ namespace dataset {
class ResizePreserveAROp : public TensorOp { class ResizePreserveAROp : public TensorOp {
public: public:
// Default values, also used by python_bindings.cc // Default values, also used by python_bindings.cc
static const int32_t kDefImgorientation; static const int32_t kDefImgOrientation;
ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgorientation); ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgOrientation);
~ResizePreserveAROp() override = default; ~ResizePreserveAROp() override = default;

View File

@ -35,9 +35,9 @@ Status ResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) {
int32_t input_w = input[0]->shape()[1]; int32_t input_w = input[0]->shape()[1];
output->resize(2); output->resize(2);
(*output)[1] = std::move(input[1]); // move boxes over to output (*output)[1] = input[1]; // move boxes over to output
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input[0])); std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input[0]);
RETURN_IF_NOT_OK(ResizeOp::Compute(std::static_pointer_cast<Tensor>(input_cv), &(*output)[0])); RETURN_IF_NOT_OK(ResizeOp::Compute(std::static_pointer_cast<Tensor>(input_cv), &(*output)[0]));

View File

@ -29,7 +29,7 @@ namespace mindspore {
namespace dataset { namespace dataset {
class RgbaToBgrOp : public TensorOp { class RgbaToBgrOp : public TensorOp {
public: public:
RgbaToBgrOp() {} RgbaToBgrOp() = default;
~RgbaToBgrOp() override = default; ~RgbaToBgrOp() override = default;

View File

@ -29,7 +29,7 @@ namespace mindspore {
namespace dataset { namespace dataset {
class RgbaToRgbOp : public TensorOp { class RgbaToRgbOp : public TensorOp {
public: public:
RgbaToRgbOp() {} RgbaToRgbOp() = default;
~RgbaToRgbOp() override = default; ~RgbaToRgbOp() override = default;

View File

@ -42,9 +42,10 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
/// 1, 5, 1, /// 1, 5, 1,
/// 1, 1, 1 /// 1, 1, 1
float filterSum = 13.0; const float filterMid = 5.0;
const float filterSum = 13.0;
cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum)); cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum));
filter.at<float>(1, 1) = 5.0 / filterSum; filter.at<float>(1, 1) = filterMid / filterSum;
/// applying filter on channels /// applying filter on channels
cv::Mat result = cv::Mat(); cv::Mat result = cv::Mat();

View File

@ -57,7 +57,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr<Tenso
SoftDpCropInfo crop_info; SoftDpCropInfo crop_info;
RETURN_IF_NOT_OK(GetCropInfo(input, &crop_info)); RETURN_IF_NOT_OK(GetCropInfo(input, &crop_info));
try { try {
unsigned char *buffer = const_cast<unsigned char *>(input->GetBuffer()); auto buffer = const_cast<unsigned char *>(input->GetBuffer());
CHECK_FAIL_RETURN_UNEXPECTED(buffer != nullptr, CHECK_FAIL_RETURN_UNEXPECTED(buffer != nullptr,
"SoftDvppDecodeRandomCropResizeJpeg: the input image buffer is empty."); "SoftDvppDecodeRandomCropResizeJpeg: the input image buffer is empty.");
SoftDpProcsessInfo info; SoftDpProcsessInfo info;

View File

@ -21,9 +21,9 @@
#include <random> #include <random>
#include <string> #include <string>
#include "./utils/external_soft_dp.h"
#include "minddata/dataset/core/tensor.h" #include "minddata/dataset/core/tensor.h"
#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h" #include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
#include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h"
#include "minddata/dataset/util/status.h" #include "minddata/dataset/util/status.h"
namespace mindspore { namespace mindspore {

View File

@ -32,7 +32,7 @@ class SoftDvppDecodeResizeJpegOp : public TensorOp {
: target_height_(target_height), target_width_(target_width) {} : target_height_(target_height), target_width_(target_width) {}
/// \brief Destructor /// \brief Destructor
~SoftDvppDecodeResizeJpegOp() = default; ~SoftDvppDecodeResizeJpegOp() override = default;
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override; Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;

View File

@ -17,7 +17,7 @@
#ifndef EXTERNAL_SOFTDP_H #ifndef EXTERNAL_SOFTDP_H
#define EXTERNAL_SOFTDP_H #define EXTERNAL_SOFTDP_H
#include <stdint.h> #include <cstdint>
struct SoftDpProcsessInfo { struct SoftDpProcsessInfo {
uint8_t *input_buffer; // input buffer uint8_t *input_buffer; // input buffer

View File

@ -44,11 +44,10 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo *soft_dp_process_info) {
} }
// use vpc interface to resize and convert RGB, give user output buf and output size. // use vpc interface to resize and convert RGB, give user output buf and output size.
SoftDpCropInfo crop; auto crop = SoftDpCropInfo{.left = 0,
crop.left = 0; .right = static_cast<uint32_t>(vpc_input_info.real_width - 1),
crop.right = vpc_input_info.real_width - 1; .up = 0,
crop.up = 0; .down = static_cast<uint32_t>(vpc_input_info.real_height - 1)};
crop.down = vpc_input_info.real_height - 1;
VpcInfo output; VpcInfo output;
output.addr = soft_dp_process_info->output_buffer; output.addr = soft_dp_process_info->output_buffer;

View File

@ -17,8 +17,8 @@
#ifndef SOFT_DP_H #ifndef SOFT_DP_H
#define SOFT_DP_H #define SOFT_DP_H
#include <stdint.h> #include <cstdint>
#include "./external_soft_dp.h" #include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h"
enum JpegdToVpcFormat { enum JpegdToVpcFormat {
INPUT_VPC_UNKNOWN = -1, INPUT_VPC_UNKNOWN = -1,

View File

@ -25,11 +25,10 @@
#define DP_EVENT 0x10000 #define DP_EVENT 0x10000
#define DP_DEBUG_LEVEL (DP_EVENT | DP_ERR | DP_WARNING | DP_INFO | DP_DEBUG) #define DP_DEBUG_LEVEL (DP_EVENT | DP_ERR | DP_WARNING | DP_INFO | DP_DEBUG)
#include <vector>
#include <string>
#if defined(DVPP_UTST) || defined(DEBUG) #if defined(DVPP_UTST) || defined(DEBUG)
#include <stdio.h> #include <stdio.h>
#include <string>
#include <vector>
#define DP_LOG(model, level, format, ...) \ #define DP_LOG(model, level, format, ...) \
do { \ do { \
@ -67,6 +66,8 @@
#include <securec.h> #include <securec.h>
#include <cstdio> #include <cstdio>
#include <vector>
#include <string>
#include "glog/logging.h" #include "glog/logging.h"
template <typename... Args> template <typename... Args>

View File

@ -48,9 +48,5 @@ bool IsDirectory(const std::string &path) {
return false; return false;
} }
if (S_ISDIR(buf.st_mode)) { return S_ISDIR(buf.st_mode);
return true;
} else {
return false;
}
} }

View File

@ -40,11 +40,7 @@ T1 AlignDown(T1 num, T2 align) {
template <typename T> template <typename T>
bool IsInTheScope(T num, T left_point, T right_point) { bool IsInTheScope(T num, T left_point, T right_point) {
if (num >= left_point && num <= right_point) { return num >= left_point && num <= right_point;
return true;
}
return false;
} }
template <typename T> template <typename T>

View File

@ -109,19 +109,19 @@ int32_t SoftVpc::CheckParamter() {
uint32_t out_width = out_width_; uint32_t out_width = out_width_;
uint32_t out_height = out_height_; uint32_t out_height = out_height_;
bool flag = (out_width * 32 >= crop_width) ? true : false; // A maximum of 32x zoom-out bool flag = (out_width * 32 >= crop_width); // A maximum of 32x zoom-out
VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
"Max reduction multiple is 32. Please check left(%u), right(%u), out_width(%u).", "Max reduction multiple is 32. Please check left(%u), right(%u), out_width(%u).",
left_, right_, out_width); // Up to 16x magnification left_, right_, out_width); // Up to 16x magnification
flag = (crop_width * 16 >= out_width) ? true : false; flag = (crop_width * 16 >= out_width);
VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
"Max magnification is 16. Please check left(%u), right(%u), out_width(%u).", left_, "Max magnification is 16. Please check left(%u), right(%u), out_width(%u).", left_,
right_, out_width); right_, out_width);
flag = (out_height * 32 >= crop_height) ? true : false; // A maximum of 32x zoom-out flag = (out_height * 32 >= crop_height); // A maximum of 32x zoom-out
VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
"Max reduction multiple is 32. Please check up(%u), down(%u), out_height(%u).", up_, "Max reduction multiple is 32. Please check up(%u), down(%u), out_height(%u).", up_,
down_, out_height); down_, out_height);
flag = (crop_height * 16 >= out_height) ? true : false; // Up to 16x magnification flag = (crop_height * 16 >= out_height); // Up to 16x magnification
VPC_CHECK_COND_FAIL_PRINT_RETURN( VPC_CHECK_COND_FAIL_PRINT_RETURN(
flag, dpFail, "Max magnification is 16. Please check up(%u), down(%u), out_height(%u).", up_, down_, out_height); flag, dpFail, "Max magnification is 16. Please check up(%u), down(%u), out_height(%u).", up_, down_, out_height);
return dpSucc; return dpSucc;

View File

@ -34,7 +34,7 @@ class SoftVpc {
public: public:
SoftVpc(); SoftVpc();
~SoftVpc() {} ~SoftVpc() = default;
/* /*
* @brief : vpc Cropping and Scaling APIs. * @brief : vpc Cropping and Scaling APIs.

View File

@ -75,7 +75,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
// taps_4, the second character in the square brackets is the start address of the array block. // taps_4, the second character in the square brackets is the start address of the array block.
if ((*flag_ctl - initBracketNum) % arrTypeNum == 2) { if ((*flag_ctl - initBracketNum) % arrTypeNum == 2) {
while (1) { while (true) {
ss >> yuv_scaler_paraset->scale[cnt].taps_4[index->first_index++]; ss >> yuv_scaler_paraset->scale[cnt].taps_4[index->first_index++];
if (ss.fail()) { // rerad failed. if (ss.fail()) { // rerad failed.
index->first_index = index->first_index - 1; index->first_index = index->first_index - 1;
@ -94,7 +94,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
// taps_6 // taps_6
if ((*flag_ctl - initBracketNum) % arrTypeNum == 0) { if ((*flag_ctl - initBracketNum) % arrTypeNum == 0) {
while (1) { while (true) {
ss >> yuv_scaler_paraset->scale[cnt].taps_6[index->second_index++]; ss >> yuv_scaler_paraset->scale[cnt].taps_6[index->second_index++];
if (ss.fail()) { // read failed. if (ss.fail()) { // read failed.
index->second_index = index->second_index - 1; index->second_index = index->second_index - 1;
@ -115,7 +115,6 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
} }
int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) { int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) {
int32_t ret = dpSucc;
if (rlt.first == false) { if (rlt.first == false) {
API_LOGE("Get real path failed. index = %u", i); API_LOGE("Get real path failed. index = %u", i);
return dpFail; return dpFail;
@ -126,7 +125,7 @@ int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) {
return dpFail; return dpFail;
} }
return ret; return dpSucc;
} }
// Read the parameter set file and skip the comments in the file. // Read the parameter set file and skip the comments in the file.
@ -177,7 +176,7 @@ int32_t ParseFileToVar(const std::string *para_set_name, uint32_t yuv_scaler_par
} }
// cale the number of "{",check the location of the data. // cale the number of "{",check the location of the data.
if (str_line.find("{") != std::string::npos) { if (str_line.find('{') != std::string::npos) {
flag_ctl++; flag_ctl++;
flag_tap = 1; flag_tap = 1;
} }

View File

@ -19,6 +19,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "minddata/dataset/core/tensor.h" #include "minddata/dataset/core/tensor.h"
@ -29,9 +30,9 @@ namespace mindspore {
namespace dataset { namespace dataset {
class SolarizeOp : public TensorOp { class SolarizeOp : public TensorOp {
public: public:
explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(threshold) {} explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(std::move(threshold)) {}
~SolarizeOp() = default; ~SolarizeOp() override = default;
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override; Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;

View File

@ -30,7 +30,7 @@ namespace dataset {
class SwapRedBlueOp : public TensorOp { class SwapRedBlueOp : public TensorOp {
public: public:
/// \brief Constructor /// \brief Constructor
SwapRedBlueOp() {} SwapRedBlueOp() = default;
SwapRedBlueOp(const SwapRedBlueOp &rhs) = default; SwapRedBlueOp(const SwapRedBlueOp &rhs) = default;

View File

@ -22,7 +22,7 @@ namespace dataset {
const int UniformAugOp::kDefNumOps = 2; const int UniformAugOp::kDefNumOps = 2;
UniformAugOp::UniformAugOp(std::vector<std::shared_ptr<TensorOp>> op_list, int32_t num_ops) UniformAugOp::UniformAugOp(std::vector<std::shared_ptr<TensorOp>> op_list, int32_t num_ops)
: tensor_op_list_(op_list), num_ops_(num_ops) { : tensor_op_list_(std::move(op_list)), num_ops_(num_ops) {
rnd_.seed(GetSeed()); rnd_.seed(GetSeed());
} }

Some files were not shown because too many files have changed in this diff Show More