diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index e9579ea1eb3..1f6a6a35f79 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -593,14 +593,16 @@ SchemaObj::SchemaObj(const std::vector &schema_file) : data_(std::make_sha // SchemaObj Init function Status SchemaObj::Init() { - if (!data_->schema_file_.empty()) { - Path schema_file(data_->schema_file_); + if (data_ != nullptr && !data_->schema_file_.empty()) { + std::string real_path; + RETURN_IF_NOT_OK(Path::RealPath(data_->schema_file_, real_path)); + Path schema_file(real_path); CHECK_FAIL_RETURN_UNEXPECTED(schema_file.Exists(), "The file " + data_->schema_file_ + " does not exist or permission denied!"); nlohmann::json js; try { - std::ifstream in(data_->schema_file_); + std::ifstream in(real_path); in >> js; CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(), "\"columns\" node is required in the schema json file."); diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc index cb23e9395fe..3de7f6bd8fe 100644 --- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc +++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc @@ -27,7 +27,8 @@ Iterator::~Iterator() { Stop(); } // Get the next row from the data pipeline. Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) { - // Clean data row + RETURN_UNEXPECTED_IF_NULL(row); + // Clean data buffer row->clear(); std::unordered_map> md_map; Status rc = consumer_->GetNextAsMap(&md_map); @@ -47,6 +48,7 @@ Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) { // Get the next row from the data pipeline. Status Iterator::GetNextRow(MSTensorVec *row) { // Clean data row + RETURN_UNEXPECTED_IF_NULL(row); row->clear(); // create a dataset tensor row and fetch. Then we convert the output to MSTensor std::vector> md_row; @@ -84,6 +86,7 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr ds, int32_t num_epo PullIterator::PullIterator() : pull_consumer_(nullptr) {} // Get the next row from the data pipeline. Status PullIterator::GetRows(int32_t num_rows, std::vector *const row) { + RETURN_UNEXPECTED_IF_NULL(row); for (int i = 0; i < num_rows; i++) { std::vector> md_row; Status rc = pull_consumer_->GetNextAsVector(&md_row); @@ -105,6 +108,7 @@ Status PullIterator::GetRows(int32_t num_rows, std::vector *const r } Status PullIterator::GetNextRow(MSTensorVec *const row) { + RETURN_UNEXPECTED_IF_NULL(row); CHECK_FAIL_RETURN_UNEXPECTED(pull_consumer_ != nullptr, "Consumer is nullptr."); std::vector> md_row; Status rc = pull_consumer_->GetNextAsVector(&md_row); diff --git a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc index 5154812253b..71d285bdaca 100644 --- a/mindspore/ccsrc/minddata/dataset/core/config_manager.cc +++ b/mindspore/ccsrc/minddata/dataset/core/config_manager.cc @@ -107,6 +107,7 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) { nlohmann::json js; in >> js; rc = FromJson(js); + in.close(); } catch (const nlohmann::json::type_error &e) { std::ostringstream ss; ss << "Client file failed to load:\n" << e.what(); diff --git a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc index 052a585eb1d..48980fb929a 100644 --- a/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/cv_tensor.cc @@ -29,8 +29,10 @@ CVTensor::CVTensor(std::shared_ptr tensor) : Tensor(std::move(*tensor)) } Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) { + RETURN_UNEXPECTED_IF_NULL(out); const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator(); *out = std::allocate_shared(*alloc, shape, type); + RETURN_UNEXPECTED_IF_NULL(out); int64_t byte_size = (*out)->SizeInBytes(); // Don't allocate if we have a tensor with no elements. if (byte_size != 0) { @@ -41,6 +43,7 @@ Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPt } Status CVTensor::CreateFromMat(const cv::Mat &mat, const dsize_t rank, CVTensorPtr *out) { + RETURN_UNEXPECTED_IF_NULL(out); TensorPtr out_tensor; cv::Mat mat_local = mat; // if the input Mat's memory is not continuous, copy it to one block of memory @@ -78,6 +81,9 @@ std::pair, int> CVTensor::IsValidImage(const TensorShape &sha } std::shared_ptr CVTensor::AsCVTensor(std::shared_ptr t) { + if (t == nullptr) { + return nullptr; + } std::shared_ptr cv_t = std::dynamic_pointer_cast(t); if (cv_t != nullptr) { return cv_t; @@ -88,13 +94,13 @@ std::shared_ptr CVTensor::AsCVTensor(std::shared_ptr t) { } Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat) { - std::pair, int> cv_shape_type = IsValidImage(shape, type); + RETURN_UNEXPECTED_IF_NULL(data); + RETURN_UNEXPECTED_IF_NULL(mat); + const int kShapeAsDefault = 2; + std::pair, int> cv_shape_type = IsValidImage(shape, type); if (cv_shape_type.second == -1) { std::vector sizes = shape.AsVector(); std::vector sizes32(sizes.begin(), sizes.end()); // convert long to int for usage with OpenCV - if (static_cast(shape.Rank()) != shape.Rank()) { - RETURN_STATUS_UNEXPECTED("Error in creating CV mat. Wrong shape."); - } uint8_t cv_type = type.AsCVType(); if (cv_type == kCVInvalidType) { @@ -102,7 +108,7 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType & } *mat = cv::Mat(static_cast(shape.Rank()), &sizes32[0], cv_type, data); } else { - *mat = cv::Mat(2, &(cv_shape_type.first[0]), cv_shape_type.second, data); + *mat = cv::Mat(kShapeAsDefault, &(cv_shape_type.first[0]), cv_shape_type.second, data); } return Status::OK(); } @@ -121,10 +127,14 @@ Status CVTensor::ExpandDim(const dsize_t &axis) { void CVTensor::Squeeze() { Tensor::Squeeze(); - (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); + Status rc = this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); + if (rc.IsError()) { + MS_LOG(ERROR) << "Squeeze failed, error details is " << rc; + } } Status CVTensor::MatAtIndex(const std::vector &index, cv::Mat *mat) { + RETURN_UNEXPECTED_IF_NULL(mat); uchar *start = nullptr; TensorShape remaining({-1}); RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining)); diff --git a/mindspore/ccsrc/minddata/dataset/core/data_type.h b/mindspore/ccsrc/minddata/dataset/core/data_type.h index c5621df60dd..1ac5443d15d 100644 --- a/mindspore/ccsrc/minddata/dataset/core/data_type.h +++ b/mindspore/ccsrc/minddata/dataset/core/data_type.h @@ -143,15 +143,15 @@ class DataType { constexpr bool operator!=(const Type a) const { return type_ != a; } // Disable this usage `if(d)` where d is of type DataType - /// \return + /// \return return nothing since we deiable this function. operator bool() = delete; // To be used in Switch/case - /// \return + /// \return data type internal. operator Type() const { return type_; } // The number of bytes needed to store one value of this type - /// \return + /// \return the number of bytes of the type. uint8_t SizeInBytes() const; #ifndef ENABLE_ANDROID diff --git a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc index 957e4c763cd..73fba5c6cb7 100644 --- a/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/de_tensor.cc @@ -41,15 +41,17 @@ DETensor::DETensor(std::shared_ptr device_tensor_impl, bo : device_tensor_impl_(device_tensor_impl), name_("MindDataDeviceTensor"), is_device_(is_device) { // The sequence of shape_ is (width, widthStride, height, heightStride) in Dvpp module // We need to add [1]widthStride and [3]heightStride, which are actual YUV image shape, into shape_ attribute - uint8_t flag = 0; - for (auto &i : device_tensor_impl->GetYuvStrideShape()) { - if (flag % 2 == 1) { - int64_t j = static_cast(i); - shape_.emplace_back(j); + if (device_tensor_impl && device_tensor_impl->GetYuvStrideShape().size() > 0) { + uint8_t flag = 0; + for (auto &i : device_tensor_impl->GetYuvStrideShape()) { + if (flag % 2 == 1) { + int64_t j = static_cast(i); + shape_.emplace_back(j); + } + ++flag; } - ++flag; + std::reverse(shape_.begin(), shape_.end()); } - std::reverse(shape_.begin(), shape_.end()); MS_LOG(INFO) << "This is a YUV420 format image, one pixel takes 1.5 bytes. Therefore, the shape of" << " image is in (H, W) format. You can search for more information about YUV420 format"; } diff --git a/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc b/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc index b12177b8694..80cfc532a24 100644 --- a/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/device_tensor.cc @@ -23,7 +23,10 @@ namespace mindspore { namespace dataset { -DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) { +const int kYuvDefaultChannels = 4; + +DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) + : Tensor(shape, type), device_data_(nullptr), size_(0) { // grab the mem pool from global context and create the allocator for char data area std::shared_ptr global_pool = GlobalContext::Instance()->mem_pool(); data_allocator_ = std::make_unique>(global_pool); @@ -34,6 +37,7 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Ten Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr *out) { CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape."); CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Invalid nullptr pointer."); const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator(); *out = std::allocate_shared(*alloc, shape, type); // if it's a string tensor and it has no elements, Just initialize the shape and type. @@ -42,6 +46,7 @@ Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, } CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric."); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory faiiled."); int64_t bytes = (*out)->SizeInBytes(); // Don't allocate if we have a tensor with no elements. @@ -58,9 +63,11 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); CHECK_FAIL_RETURN_UNEXPECTED(data_ptr != nullptr, "Data pointer is NULL"); CHECK_FAIL_RETURN_UNEXPECTED(dataSize > 0, "Invalid data size"); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Out pointer is NULL"); const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator(); *out = std::allocate_shared(*alloc, shape, type); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); // if it's a string tensor and it has no elements, Just initialize the shape and type. if (!type.IsNumeric() && shape.NumOfElements() == 0) { @@ -76,6 +83,8 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size)); } + CHECK_FAIL_RETURN_UNEXPECTED(attributes.size() >= kYuvDefaultChannels, + "Invalid attributes size, should be greater than 4."); CHECK_FAIL_RETURN_UNEXPECTED( (*out)->SetAttributes(data_ptr, dataSize, attributes[0], attributes[1], attributes[2], attributes[3]), "Fail to set attributes for DeviceTensor"); @@ -129,6 +138,7 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) { #ifdef ENABLE_ACL Status DeviceTensor::DataPop_(std::shared_ptr *host_tensor) { + CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "host tensor pointer is NULL."); void *resHostBuf = nullptr; APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize()); if (ret != APP_ERR_OK) { @@ -151,13 +161,18 @@ Status DeviceTensor::DataPop_(std::shared_ptr *host_tensor) { mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize(); const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1}); + + CHECK_FAIL_RETURN_UNEXPECTED(this->GetYuvStrideShape().size() >= kYuvDefaultChannels, + "Invalid YuvShape, should greater than 4"); + uint32_t _output_width_ = this->GetYuvStrideShape()[0]; uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1]; uint32_t _output_height_ = this->GetYuvStrideShape()[2]; uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3]; const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8); - mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor); + RETURN_IF_NOT_OK(mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor)); + CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "Allocate memory failed."); (*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_); if (!(*host_tensor)->HasData()) { diff --git a/mindspore/ccsrc/minddata/dataset/core/pybind_support.h b/mindspore/ccsrc/minddata/dataset/core/pybind_support.h index 7a553b9fef8..85e1c6d5f41 100644 --- a/mindspore/ccsrc/minddata/dataset/core/pybind_support.h +++ b/mindspore/ccsrc/minddata/dataset/core/pybind_support.h @@ -39,7 +39,9 @@ struct npy_scalar_caster { bool load(handle src, bool convert) { // Taken from Eigen casters. Permits either scalar dtype or scalar array. handle type = dtype::of().attr("type"); // Could make more efficient. - if (!convert && !isinstance(src) && !isinstance(src, type)) return false; + if (!convert && !isinstance(src) && !isinstance(src, type)) { + return false; + } Array tmp = Array::ensure(src); if (tmp && tmp.size() == 1 && tmp.ndim() == 0) { diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc index 315ce87ed84..95c96864d46 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc @@ -91,8 +91,10 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept { Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) { CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape."); CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type."); + RETURN_UNEXPECTED_IF_NULL(out); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); *out = std::allocate_shared(*alloc, shape, type); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); // if it's a string tensor and it has no elements, Just initialize the shape and type. if (!type.IsNumeric() && shape.NumOfElements() == 0) { return Status::OK(); @@ -110,7 +112,7 @@ Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, Tenso } Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) { RETURN_IF_NOT_OK(CreateEmpty(shape, type, out)); - if (src != nullptr) { + if (src != nullptr && out != nullptr) { // Given the shape/type of this tensor, compute the data size and copy in the input bytes. int64_t byte_size = (*out)->SizeInBytes(); if (byte_size == 0) { @@ -129,9 +131,11 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src, const dsize_t &length, TensorPtr *out) { - CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr, "Pointer to source data is null."); + RETURN_UNEXPECTED_IF_NULL(src); + RETURN_UNEXPECTED_IF_NULL(out); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); *out = std::allocate_shared(*alloc, shape, type); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); if (type.IsNumeric()) { dsize_t calculated_length = (*out)->SizeInBytes(); CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape."); @@ -159,6 +163,7 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, #ifdef ENABLE_PYTHON Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr *out) { + RETURN_UNEXPECTED_IF_NULL(out); std::vector shape; for (dsize_t i = 0; i < arr.ndim(); i++) { shape.push_back(static_cast(arr.shape()[i])); @@ -167,9 +172,11 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr *out) { std::vector strings; if (arr.dtype().kind() == 'U') { - std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast(s)); }); + (void)std::for_each(arr.begin(), arr.end(), + [&strings](const auto &s) { strings.emplace_back(py::cast(s)); }); } else { - std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast(s)); }); + (void)std::for_each(arr.begin(), arr.end(), + [&strings](const auto &s) { strings.emplace_back(py::cast(s)); }); } arr.resize(shape); // resize arr back to the original shape @@ -178,6 +185,7 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr *out) { } Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr *out) { + RETURN_UNEXPECTED_IF_NULL(out); if (DataType::FromNpArray(arr) == DataType::DE_STRING) { return CreateFromNpString(arr, out); } @@ -191,7 +199,7 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr * shape.push_back(static_cast(arr.shape()[i])); strides.push_back(static_cast(arr.strides()[i])); // in case of empty array num_items=0 - if (count != 0) { + if (count != 0 && shape.size() > i && shape[i] != 0) { count /= shape[i]; if (strides[i] != arr.itemsize() * count) { is_strided = true; @@ -213,9 +221,11 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr * #ifndef ENABLE_ANDROID Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) { + RETURN_UNEXPECTED_IF_NULL(out); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); *out = std::allocate_shared(*alloc, TensorShape({static_cast(bytes_list.value_size())}), DataType(DataType::DE_STRING)); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); // total bytes needed = offset array + strings // offset array needs to store one offset var per element + 1 extra to get the length of the last string. // strings will be null-terminated --> need 1 extra byte per element @@ -236,9 +246,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const num_bytes -= kOffsetSize; // insert actual string int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1); - if (ret_code != 0) { - MS_LOG(ERROR) << "Cannot copy string into Tensor"; - } + CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Cannot copy string into Tensor"); // next string will be stored right after the current one. offset = offset + str.length() + 1; // total bytes are reduced by the length of the string @@ -257,6 +265,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const #endif Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr *out) { + RETURN_UNEXPECTED_IF_NULL(out); Path file(path); if (file.IsDirectory()) { RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory."); @@ -269,8 +278,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr * CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Failed to find size of file, check path: " + path); RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out)); int64_t written_bytes = fs.read(reinterpret_cast((*out)->GetMutableBuffer()), num_bytes).gcount(); - CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), - "Error in writing to tensor, check path: " + path); + if (!(written_bytes == num_bytes && fs.good())) { + fs.close(); + RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path); + } fs.close(); return Status::OK(); } @@ -278,8 +289,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr * #ifndef ENABLE_ANDROID Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, const DataType &type, dsize_t pad_size, TensorPtr *out) { + RETURN_UNEXPECTED_IF_NULL(out); RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out)); + RETURN_UNEXPECTED_IF_NULL(out); unsigned char *current_tensor_addr = (*out)->GetMutableBuffer(); int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size; @@ -313,18 +326,23 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied) Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector shape, std::vector strides, uint8_t type_size) { + RETURN_UNEXPECTED_IF_NULL(dst); + RETURN_UNEXPECTED_IF_NULL(src); dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>()); for (dsize_t i = 0; i < size; ++i) { dsize_t offset = 0; dsize_t count = i; for (size_t j = 0; j < shape.size(); ++j) { // convert 1d array's index to 3d array's index (A -> B) + CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero."); dsize_t idx = count % shape[shape.size() - 1 - j]; count /= shape[shape.size() - 1 - j]; // calculate the raw data offset based on strides (B -> C) offset += idx * strides[shape.size() - 1 - j]; // once count = 0, the following idxes are all zero, skip them - if (count == 0) break; + if (count == 0) { + break; + } } // strides already consider byte size of the data type, but dst doesn't. // dst[i] = dst + i * type_size = src + offset @@ -482,6 +500,7 @@ void Tensor::Invalidate() { template Status Tensor::GetItemPtr(T **ptr, const std::vector &index) const { + RETURN_UNEXPECTED_IF_NULL(ptr); if (type_.IsCompatible()) { if (data_ == nullptr) { std::string err = "Data is not allocated yet"; @@ -490,6 +509,7 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector &index) const { dsize_t flat_idx; RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx)); *ptr = reinterpret_cast(data_ + flat_idx * type_.SizeInBytes()); + RETURN_UNEXPECTED_IF_NULL(ptr); return Status::OK(); } else { @@ -499,6 +519,8 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector &index) const { } Status Tensor::GetItemPtr(uchar **ptr, const std::vector &index, offset_t *length) const { + RETURN_UNEXPECTED_IF_NULL(ptr); + RETURN_UNEXPECTED_IF_NULL(length); if (type_ == DataType::DE_STRING) { if (data_ == nullptr) { std::string err = "Data is not allocated yet"; @@ -519,6 +541,8 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector &index, offset } Status Tensor::StartAddrOfIndex(std::vector ind, uchar **start_addr_of_index, TensorShape *remaining) { + RETURN_UNEXPECTED_IF_NULL(start_addr_of_index); + RETURN_UNEXPECTED_IF_NULL(remaining); if (type() == DataType::DE_STRING) { RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet."); } @@ -541,6 +565,7 @@ Status Tensor::StartAddrOfIndex(std::vector ind, uchar **start_addr_of_ Status Tensor::InsertTensor(const std::vector &ind, const std::shared_ptr &tensor, const bool partial_insert) { + RETURN_UNEXPECTED_IF_NULL(tensor); std::string err_msg; if (partial_insert) { err_msg += (ind.size() != 1) @@ -603,13 +628,14 @@ Status Tensor::ExpandDim(const dsize_t &axis) { std::vector Tensor::Strides() const { std::vector strides = shape_.Strides(); uint8_t size = type_.SizeInBytes(); - std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; }); + (void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; }); return strides; } #ifdef ENABLE_PYTHON Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { RETURN_UNEXPECTED_IF_NULL(t); + RETURN_UNEXPECTED_IF_NULL(out); CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings."); std::string format_desc = t->type().GetPybindFormat(); @@ -622,6 +648,7 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) { t->Rank(), /* Number of dimensions */ t->shape().AsVector(), /* Buffer dimensions */ t->Strides()); + RETURN_UNEXPECTED_IF_NULL(out); return Status::OK(); } #endif @@ -721,6 +748,7 @@ Status Tensor::from_json_convert(nlohmann::json json_data, TensorShape shape, st template Status Tensor::GetItemAt(T *o, const std::vector &index) const { + RETURN_UNEXPECTED_IF_NULL(o); if (data_ == nullptr) { RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); } @@ -794,6 +822,7 @@ Status Tensor::GetDataAsNumpy(py::array *data) { return Status::OK(); } Status Tensor::GetDataAsNumpyStrings(py::array *data) { + RETURN_UNEXPECTED_IF_NULL(data); auto itr = begin(); uint64_t max_value = 0; for (; itr != end(); ++itr) { @@ -807,7 +836,9 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { max_value = (max_value == 0 ? 1 : max_value); uint64_t total_size = shape_.NumOfElements() * max_value; char *tmp_data = reinterpret_cast(data_allocator_->allocate(total_size)); - if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array."); + if (tmp_data == nullptr) { + RETURN_STATUS_UNEXPECTED("Cannot create temp array."); + } int ret_code = memset_s(tmp_data, total_size, 0, total_size); CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory"); @@ -820,9 +851,10 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) { } } auto strides = shape_.Strides(); - std::transform(strides.begin(), strides.end(), strides.begin(), - [&max_value](const auto &s) { return s * max_value; }); + (void)std::transform(strides.begin(), strides.end(), strides.begin(), + [&max_value](const auto &s) { return s * max_value; }); *data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data); + RETURN_UNEXPECTED_IF_NULL(data); data_allocator_->deallocate(reinterpret_cast(tmp_data)); return Status::OK(); } @@ -832,6 +864,7 @@ void Tensor::Squeeze() { shape_ = shape_.Squeeze(); } template Status Tensor::GetUnsignedIntAt(T *o, const std::vector &index) const { + RETURN_UNEXPECTED_IF_NULL(o); if (data_ == nullptr) { RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); } @@ -873,6 +906,7 @@ Status Tensor::GetUnsignedIntAt(T *o, const std::vector &index) const { template Status Tensor::GetSignedIntAt(T *o, const std::vector &index) const { + RETURN_UNEXPECTED_IF_NULL(o); if (data_ == nullptr) { RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); } @@ -914,6 +948,7 @@ Status Tensor::GetSignedIntAt(T *o, const std::vector &index) const { template Status Tensor::GetFloatAt(T *o, const std::vector &index) const { + RETURN_UNEXPECTED_IF_NULL(o); if (data_ == nullptr) { RETURN_STATUS_UNEXPECTED("Data is not allocated yet"); } @@ -958,6 +993,7 @@ Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length return Status::OK(); } Status Tensor::CopyLastDimAt(const std::shared_ptr &src, const std::vector &index) { + RETURN_UNEXPECTED_IF_NULL(src); CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type"); CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0"); @@ -975,6 +1011,7 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr &src, const std::vect Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index, SliceOption *slice_option_ptr) { + RETURN_UNEXPECTED_IF_NULL(slice_option_ptr); if (slice_option.indices_.empty() && !slice_option.slice_.valid()) { RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty."); } @@ -983,6 +1020,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given."); } + CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should greater than slices index."); // if slice object was provided, indices should be empty. Generate indices from the slice object. if (slice_option.indices_.empty()) { // check if slice is valid @@ -1010,6 +1048,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl } Status Tensor::Slice(std::shared_ptr *out, const std::vector slice_options_) { + RETURN_UNEXPECTED_IF_NULL(out); std::vector converted_slice_objects; CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast(std::numeric_limits::max()), @@ -1046,7 +1085,7 @@ Status Tensor::Slice(std::shared_ptr *out, const std::vector *out, const std::vector *out, const std::vector> &indices, const TensorShape &shape) { + RETURN_UNEXPECTED_IF_NULL(out); RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out)); + RETURN_UNEXPECTED_IF_NULL(out); (*out)->GetMutableBuffer(); dsize_t out_index = 0; std::vector dim_length = shape_.AsVector(); @@ -1131,6 +1172,7 @@ Status Tensor::SliceNumeric(std::shared_ptr *out, const std::vector *out, const std::vector> &indices, const TensorShape &shape) { + RETURN_UNEXPECTED_IF_NULL(out); std::vector dim_length = shape_.AsVector(); std::vector strings; diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h index cc011232fde..3c6833049a8 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.h +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h @@ -414,6 +414,10 @@ class Tensor { /// \param[in] index_vector vector of indices /// \return std::vector modified vector of indices static inline std::vector HandleNegIndices(std::vector index_vector, std::vector length) { + if (length.size() < index_vector.size()) { + MS_LOG(ERROR) << "The size of length should be greater than the shape of index_vector"; + return {}; + } std::vector indices(index_vector.size(), 0); for (int i = 0; i < index_vector.size(); i++) { indices[i] = HandleNeg(index_vector[i], length[i]); @@ -780,12 +784,14 @@ inline Tensor::TensorIterator Tensor::end() template <> inline Status Tensor::CreateFromVector(const std::vector &items, const TensorShape &shape, TensorPtr *out) { + RETURN_UNEXPECTED_IF_NULL(out); CHECK_FAIL_RETURN_UNEXPECTED( items.size() == shape.NumOfElements(), "Number of elements in the vector does not match the number of elements of the shape required"); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); *out = std::allocate_shared(*alloc, TensorShape({static_cast(items.size())}), DataType(DataType::DE_STRING)); + CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed."); if (items.size() == 0) { if (shape.known()) { return (*out)->Reshape(shape); @@ -835,6 +841,7 @@ inline Status Tensor::CreateFromVector(const std::vector inline Status Tensor::CreateScalar(const std::string &item, TensorPtr *out) { + RETURN_UNEXPECTED_IF_NULL(out); return CreateFromVector({item}, TensorShape::CreateScalar(), out); } } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc index 5965ff6fdf5..c358e24dd1d 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor_helpers.cc @@ -16,6 +16,8 @@ #include #include #include "minddata/dataset/core/tensor_helpers.h" +#include "minddata/dataset/util/log_adapter.h" +#include "minddata/dataset/util/status.h" namespace mindspore { namespace dataset { @@ -23,6 +25,10 @@ namespace dataset { void IndexGeneratorHelper(int8_t depth, std::vector *numbers, const std::vector &slice_list, std::vector> *matrix) { + if (numbers == nullptr || matrix == nullptr) { + MS_LOG(ERROR) << "Invalid input pointer, can't be NULL"; + return; + } // for loop changes if its an index instead of a slice object if (depth > 0) { int8_t new_depth = depth - 1; diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_row.h b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h index 2b8291135e5..b2e88df125a 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor_row.h +++ b/mindspore/ccsrc/minddata/dataset/core/tensor_row.h @@ -87,6 +87,7 @@ class TensorRow { /// \param[out] output TensorRow template static Status ConvertToTensorRow(const std::vector &o, TensorRow *output) { + RETURN_UNEXPECTED_IF_NULL(output); DataType data_type = DataType::FromCType(); if (data_type == DataType::DE_UNKNOWN) { RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized."); @@ -106,6 +107,7 @@ class TensorRow { /// \param[out] output TensorRow template static Status ConvertToTensorRow(const T &o, TensorRow *output) { + RETURN_UNEXPECTED_IF_NULL(output); DataType data_type = DataType::FromCType(); if (data_type == DataType::DE_UNKNOWN) { RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized."); @@ -125,6 +127,7 @@ class TensorRow { /// \param[out] o the primitive variable template static Status ConvertFromTensorRow(const TensorRow &input, T *o) { + RETURN_UNEXPECTED_IF_NULL(o); DataType data_type = DataType::FromCType(); RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type)); if (input.at(0)->type() != data_type) { @@ -142,6 +145,7 @@ class TensorRow { /// \param[out] o vector of primitive variable template static Status ConvertFromTensorRow(const TensorRow &input, std::vector *o) { + RETURN_UNEXPECTED_IF_NULL(o); DataType data_type = DataType::FromCType(); RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type)); if (input.at(0)->Rank() != 1) diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc index 30fdd7dedff..37c9016df04 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor_shape.cc @@ -40,7 +40,7 @@ bool multi_ok(dsize_t x, dsize_t y) { } dsize_t TensorShape::NumOfElements() const { - if (!known()) { + if (!known() && strides_.size() < 1) { return 0; } return strides_[0]; @@ -216,12 +216,9 @@ py::list TensorShape::AsPyList() { #endif TensorShape TensorShape::Squeeze() const { - std::vector new_shape; - for (auto s : AsVector()) { - if (s != 1) { - new_shape.push_back(s); - } - } + std::vector new_shape(raw_shape_.size()); + auto it = std::copy_if(raw_shape_.begin(), raw_shape_.end(), new_shape.begin(), [](auto s) { return s != 1; }); + new_shape.resize(std::distance(new_shape.begin(), it)); return TensorShape(new_shape); } @@ -230,6 +227,7 @@ std::vector TensorShape::Strides() const { return std::vector{ // Name: ToFlatIndex() // Description: convert a vector style index to number, used to access memory internal use only Status TensorShape::ToFlatIndex(const std::vector &index, dsize_t *flat_index) const { + RETURN_UNEXPECTED_IF_NULL(flat_index); if (index.size() != raw_shape_.size()) { std::stringstream ss; ss << "Index size (" << index.size() << ") does not match the shape size (" << raw_shape_.size() << ")."; diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc index b5fc586267c..11e28b55f96 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_hw.cc @@ -101,8 +101,8 @@ Status CacheServerHW::GetNumaNodeInfo() { }; // Look for name starts with 'node' and followed by digits. const char kNodeName[] = "node"; - while (it->hasNext()) { - auto p = it->next(); + while (it->HasNext()) { + auto p = it->Next(); const std::string entry = p.Basename(); const char *name = entry.data(); if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc index 8e2b591407b..26a704a04a1 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/cache/cache_pool.cc @@ -63,8 +63,8 @@ Status CachePool::DoServiceStop() { if (!root_.toString().empty()) { Path spill = GetSpillPath(); auto it = Path::DirIterator::OpenDirectory(&spill); - while (it->hasNext()) { - rc = it->next().Remove(); + while (it->HasNext()) { + rc = it->Next().Remove(); if (rc.IsError() && rc2.IsOk()) { rc2 = rc; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc index ac5fa282c67..2275badfda5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/pull_based_tree_consumer.cc @@ -24,6 +24,7 @@ namespace mindspore::dataset { PullBasedIteratorConsumer::PullBasedIteratorConsumer() { tree_adapter_lite_ = std::make_unique(); } Status PullBasedIteratorConsumer::Init(std::shared_ptr root) { + RETURN_UNEXPECTED_IF_NULL(root); return tree_adapter_lite_->BuildTree(std::move(root)); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc index b59bcbf9467..a3bcc94acff 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/python_tree_consumer.cc @@ -20,6 +20,7 @@ namespace mindspore::dataset { Status PythonIteratorConsumer::GetNextAsList(py::list *out) { + RETURN_UNEXPECTED_IF_NULL(out); std::vector row; { py::gil_scoped_release gil_release; @@ -32,6 +33,7 @@ Status PythonIteratorConsumer::GetNextAsList(py::list *out) { } Status PythonIteratorConsumer::GetNextAsDict(py::dict *out) { + RETURN_UNEXPECTED_IF_NULL(out); std::vector>> vec; Status s; { @@ -64,6 +66,8 @@ Status PythonTreeGetters::GetRow(TensorRow *const r) { return TreeGetters::GetRow(r); } Status PythonDatasetSizeGetter::GetRow(const std::shared_ptr &tree_adapter, TensorRow *r) { + RETURN_UNEXPECTED_IF_NULL(tree_adapter); + RETURN_UNEXPECTED_IF_NULL(r); py::gil_scoped_release gil_release; return DatasetSizeGetter::GetRow(tree_adapter, r); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc index c99ffdaf733..01ae379c2ab 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc @@ -13,7 +13,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - #include #include #include @@ -179,6 +178,8 @@ Status ToDevice::Stop() { } Status ToDevice::GetDataInfo(std::vector *const types, std::vector *const shapes) { + RETURN_UNEXPECTED_IF_NULL(types); + RETURN_UNEXPECTED_IF_NULL(shapes); // tree_.root() must be DeviceQueueOp std::shared_ptr root = std::shared_ptr(tree_adapter_->GetRoot()); CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr."); @@ -218,8 +219,13 @@ Status SaveToDisk::ValidateParams() { MS_LOG(ERROR) << err; RETURN_STATUS_SYNTAX_ERROR(err); } - auto parent_path = dir.ParentPath(); - if (!parent_path.empty() && access(common::SafeCStr(parent_path), R_OK) == -1) { + std::string real_path; + if (Path::RealPath(dir.ParentPath(), real_path).IsError()) { + std::string err_msg = "CreateSaver failed, can not get real dataset path: " + dir.ParentPath(); + MS_LOG(ERROR) << err_msg; + RETURN_STATUS_SYNTAX_ERROR(err_msg); + } + if (access(dir.ParentPath().c_str(), R_OK) == -1) { std::string err_msg = "CreateSaver failed, no access to specified dataset path: " + dataset_path_; MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); @@ -250,15 +256,15 @@ Status SaveToDisk::Save() { auto mr_header = std::make_shared(); auto mr_writer = std::make_unique(); std::vector blob_fields; - if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) { + if (mindrecord::SUCCESS != mindrecord::ShardWriter::Initialize(&mr_writer, file_names)) { RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message."); } std::unordered_map column_name_id_map; for (auto el : tree_adapter_->GetColumnNameMap()) { std::string column_name = el.first; - std::transform(column_name.begin(), column_name.end(), column_name.begin(), - [](unsigned char c) { return ispunct(c) ? '_' : c; }); + (void)std::transform(column_name.begin(), column_name.end(), column_name.begin(), + [](unsigned char c) { return ispunct(c) ? '_' : c; }); column_name_id_map[column_name] = el.second; } @@ -281,17 +287,21 @@ Status SaveToDisk::Save() { RETURN_IF_NOT_OK(FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields)); MS_LOG(INFO) << "Schema of saved mindrecord: " << mr_json.dump(); if (mindrecord::SUCCESS != - mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) { + mindrecord::ShardHeader::Initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) { RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader."); } - mr_writer->SetShardHeader(mr_header); + if (mindrecord::SUCCESS != mr_writer->SetShardHeader(mr_header)) { + RETURN_STATUS_UNEXPECTED("Error: failed to set header of ShardWriter."); + } first_loop = false; } // construct data if (!row.empty()) { // write data RETURN_IF_NOT_OK(FetchDataFromTensorRow(row, column_name_id_map, &row_raw_data, &row_bin_data)); std::shared_ptr> output_bin_data; - mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data); + if (mindrecord::SUCCESS != mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data)) { + RETURN_STATUS_UNEXPECTED("Error: failed to merge blob data of ShardWriter."); + } std::map> raw_data; raw_data.insert( std::pair>(mr_schema_id, std::vector{row_raw_data})); @@ -299,12 +309,16 @@ Status SaveToDisk::Save() { if (output_bin_data != nullptr) { bin_data.emplace_back(*output_bin_data); } - mr_writer->WriteRawData(raw_data, bin_data); + if (mindrecord::SUCCESS != mr_writer->WriteRawData(raw_data, bin_data)) { + RETURN_STATUS_UNEXPECTED("Error: failed to write raw data to ShardWriter."); + } } } while (!row.empty()); - mr_writer->Commit(); - if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::finalize(file_names)) { + if (mindrecord::SUCCESS != mr_writer->Commit()) { + RETURN_STATUS_UNEXPECTED("Error: failed to commit ShardWriter."); + } + if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::Finalize(file_names)) { RETURN_STATUS_UNEXPECTED("Error: failed to finalize ShardIndexGenerator."); } return Status::OK(); @@ -407,7 +421,7 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map>> *row_bin_data, const std::unordered_map &column_name_id_map) { if (row_raw_data == nullptr) { @@ -424,6 +438,8 @@ static Status ValidateInputParams(nlohmann::json *row_raw_data, Status SaveToDisk::FetchFloatData(std::shared_ptr tensor, std::string column_name, nlohmann::json *row_raw_data, std::unique_ptr> *data_ptr) { + RETURN_UNEXPECTED_IF_NULL(row_raw_data); + RETURN_UNEXPECTED_IF_NULL(data_ptr); auto column_type = tensor->type(); Status s; if (column_type == DataType::DE_FLOAT32) { @@ -442,6 +458,9 @@ Status SaveToDisk::FetchFloatData(std::shared_ptr tensor, std::string co Status SaveToDisk::FetchItemData(std::shared_ptr tensor, std::string column_name, nlohmann::json *row_raw_data, std::map>> *row_bin_data) { + RETURN_UNEXPECTED_IF_NULL(tensor); + RETURN_UNEXPECTED_IF_NULL(row_raw_data); + RETURN_UNEXPECTED_IF_NULL(row_bin_data); auto column_type = tensor->type(); Status s; std::unique_ptr> data_ptr; @@ -492,7 +511,6 @@ Status SaveToDisk::FetchItemData(std::shared_ptr tensor, std::string col RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {})); // assume scalar string tensor std::string ss(sv); (*row_raw_data)[column_name] = std::move(ss); - return Status::OK(); } else { RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data."); } @@ -506,6 +524,8 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row, const std::unordered_map &column_name_id_map, nlohmann::json *row_raw_data, std::map>> *row_bin_data) { + RETURN_UNEXPECTED_IF_NULL(row_raw_data); + RETURN_UNEXPECTED_IF_NULL(row_bin_data); Status s; s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map); if (s.IsError()) { @@ -525,9 +545,11 @@ template Status SaveToDisk::TransformTensor(const unsigned char *src, const TensorShape &shape, const int64_t num_of_elements, std::unique_ptr *data, std::unique_ptr> *data_ptr, std::unique_ptr *s, bool need_convert) { - if (nullptr == src) { - RETURN_STATUS_UNEXPECTED("Error: buffer of Tensor is NULL."); - } + RETURN_UNEXPECTED_IF_NULL(src); + RETURN_UNEXPECTED_IF_NULL(data); + RETURN_UNEXPECTED_IF_NULL(data_ptr); + RETURN_UNEXPECTED_IF_NULL(s); + *data_ptr = std::make_unique>(num_of_elements * sizeof(T)); if (need_convert) { auto tmp_ptr = std::make_unique>(num_of_elements * sizeof(S)); @@ -560,25 +582,32 @@ TreeGetters::TreeGetters() : dataset_size_(-1), init_flag_(false), first_row_obt } Status TreeGetters::Init(std::shared_ptr d) { + RETURN_UNEXPECTED_IF_NULL(d); root_ = std::move(d); return Status::OK(); } -Status TreeGetters::GetRow(TensorRow *row) { return tree_adapter_->GetNext(row); } +Status TreeGetters::GetRow(TensorRow *row) { + RETURN_UNEXPECTED_IF_NULL(row); + return tree_adapter_->GetNext(row); +} Status TreeGetters::GetOutputTypes(std::vector *types) { + RETURN_UNEXPECTED_IF_NULL(types); RETURN_IF_NOT_OK(GetFirstRowShapeAndType()); *types = first_row_type_; return Status::OK(); } Status TreeGetters::GetOutputShapes(std::vector *shapes) { + RETURN_UNEXPECTED_IF_NULL(shapes); RETURN_IF_NOT_OK(GetFirstRowShapeAndType()); *shapes = first_row_shape_; return Status::OK(); } Status TreeGetters::GetBatchSize(int64_t *batch_size) { + RETURN_UNEXPECTED_IF_NULL(batch_size); RETURN_IF_NOT_OK(InternalInit()); std::shared_ptr root = std::shared_ptr(tree_adapter_->GetRoot()); RETURN_UNEXPECTED_IF_NULL(root); @@ -588,6 +617,7 @@ Status TreeGetters::GetBatchSize(int64_t *batch_size) { } Status TreeGetters::GetRepeatCount(int64_t *repeat_count) { + RETURN_UNEXPECTED_IF_NULL(repeat_count); RETURN_IF_NOT_OK(InternalInit()); std::shared_ptr root = std::shared_ptr(tree_adapter_->GetRoot()); RETURN_UNEXPECTED_IF_NULL(root); @@ -596,6 +626,7 @@ Status TreeGetters::GetRepeatCount(int64_t *repeat_count) { } Status TreeGetters::GetNumClasses(int64_t *num_classes) { + RETURN_UNEXPECTED_IF_NULL(num_classes); RETURN_IF_NOT_OK(InternalInit()); std::shared_ptr root = std::shared_ptr(tree_adapter_->GetRoot()); RETURN_UNEXPECTED_IF_NULL(root); @@ -604,6 +635,7 @@ Status TreeGetters::GetNumClasses(int64_t *num_classes) { } Status TreeGetters::GetColumnNames(std::vector *output) { + RETURN_UNEXPECTED_IF_NULL(output); RETURN_IF_NOT_OK(InternalInit()); std::shared_ptr root = std::shared_ptr(tree_adapter_->GetRoot()); RETURN_UNEXPECTED_IF_NULL(root); @@ -620,6 +652,7 @@ Status TreeGetters::GetColumnNames(std::vector *output) { } Status TreeGetters::GetClassIndexing(std::vector>> *output_class_indexing) { + RETURN_UNEXPECTED_IF_NULL(output_class_indexing); RETURN_IF_NOT_OK(InternalInit()); std::shared_ptr root = std::shared_ptr(tree_adapter_->GetRoot()); RETURN_UNEXPECTED_IF_NULL(root); @@ -671,6 +704,7 @@ Status DatasetSizeGetter::Init(std::shared_ptr d) { return Status::OK(); } Status DatasetSizeGetter::DryRun(std::shared_ptr ir_node, int64_t *dataset_size) { + RETURN_UNEXPECTED_IF_NULL(dataset_size); std::shared_ptr tree_adapter = std::make_shared(TreeAdapter::UsageFlag::kDeGetter); tree_adapters_.push_back(tree_adapter); RETURN_IF_NOT_OK(tree_adapter->Compile(ir_node, 1)); @@ -685,6 +719,7 @@ Status DatasetSizeGetter::DryRun(std::shared_ptr ir_node, int64_t * return Status::OK(); } Status DatasetSizeGetter::GetRow(const std::shared_ptr &tree_adapter, TensorRow *row) { + RETURN_UNEXPECTED_IF_NULL(row); return tree_adapter->GetNext(row); } Status DatasetSizeGetter::Terminate() { diff --git a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc index 03d7321409c..2990a2f5fe0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.cc @@ -73,7 +73,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten ColDescriptor::ColDescriptor(const ColDescriptor &in_cd) : type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) { // If it has a tensor shape, make a copy of it with our own unique_ptr. - tensor_shape_ = in_cd.hasShape() ? std::make_unique(in_cd.shape()) : nullptr; + tensor_shape_ = in_cd.HasShape() ? std::make_unique(in_cd.Shape()) : nullptr; } // Assignment overload @@ -84,7 +84,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) { tensor_impl_ = in_cd.tensor_impl_; col_name_ = in_cd.col_name_; // If it has a tensor shape, make a copy of it with our own unique_ptr. - tensor_shape_ = in_cd.hasShape() ? std::make_unique(in_cd.shape()) : nullptr; + tensor_shape_ = in_cd.HasShape() ? std::make_unique(in_cd.Shape()) : nullptr; } return *this; } @@ -113,7 +113,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape * // If the shape is not given in this column, then we assume the shape will be: {numElements} if (tensor_shape_ == nullptr) { - if (this->rank() == 0 && num_elements == 1) { + if (this->Rank() == 0 && num_elements == 1) { *out_shape = TensorShape::CreateScalar(); return Status::OK(); } @@ -173,7 +173,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape * } // getter function for the shape -TensorShape ColDescriptor::shape() const { +TensorShape ColDescriptor::Shape() const { if (tensor_shape_ != nullptr) { return *tensor_shape_; // copy construct a shape to return } else { @@ -257,7 +257,7 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector } // Internal helper function for parsing shape info and building a vector for the shape construction. -static Status buildShape(const nlohmann::json &shapeVal, std::vector *outShape) { +static Status BuildShape(const nlohmann::json &shapeVal, std::vector *outShape) { if (outShape == nullptr) { RETURN_STATUS_UNEXPECTED("null output shape"); } @@ -274,7 +274,8 @@ static Status buildShape(const nlohmann::json &shapeVal, std::vector *o Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::string &col_name) { int32_t rank_value = -1; TensorImpl t_impl_value = TensorImpl::kFlexible; - std::string name, type_str; + std::string name = ""; + std::string type_str = ""; std::vector tmp_shape = {}; bool shape_field_exists = false; // Iterate over this column's attributes. @@ -291,7 +292,7 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin STR_TO_TENSORIMPL(it_child.value(), t_impl_value); } else if (it_child.key() == "shape") { shape_field_exists = true; - RETURN_IF_NOT_OK(buildShape(it_child.value(), &tmp_shape)); + RETURN_IF_NOT_OK(BuildShape(it_child.value(), &tmp_shape)); } else { std::string err_msg = "Unexpected column attribute " + it_child.key() + " for column " + col_name; RETURN_STATUS_UNEXPECTED(err_msg); @@ -324,10 +325,10 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin // Create the column descriptor for this column from the data we pulled from the json file TensorShape col_shape = TensorShape(tmp_shape); if (shape_field_exists) - (void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape)); + RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape))); else // Create a column descriptor that doesn't have a shape - (void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value)); + RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value))); return Status::OK(); } @@ -345,19 +346,30 @@ Status DataSchema::LoadSchemaFile(const std::string &schema_file_path, } catch (nlohmann::json::out_of_range &e) { num_rows_ = 0; } catch (nlohmann::json::exception &e) { + in.close(); RETURN_STATUS_UNEXPECTED("Unable to parse \"numRows\" from schema"); } nlohmann::json column_tree = js.at("columns"); if (column_tree.empty()) { + in.close(); RETURN_STATUS_UNEXPECTED("columns is null"); } if (columns_to_load.empty()) { // Parse the json tree and load the schema's columns in whatever order that the json // layout decides - RETURN_IF_NOT_OK(this->AnyOrderLoad(column_tree)); + Status rc = this->AnyOrderLoad(column_tree); + if (rc.IsError()) { + in.close(); + return rc; + } } else { - RETURN_IF_NOT_OK(this->ColumnOrderLoad(column_tree, columns_to_load)); + Status rc = this->ColumnOrderLoad(column_tree, columns_to_load); + if (rc.IsError()) { + in.close(); + return rc; + } } + in.close(); } catch (const std::exception &err) { // Catch any exception and convert to Status return code RETURN_STATUS_UNEXPECTED("Schema file failed to load with JSON tools. File is: " + schema_file_path); @@ -394,7 +406,7 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string, DataSchema::~DataSchema() = default; // Getter for the ColDescriptor by index -const ColDescriptor &DataSchema::column(int32_t idx) const { +const ColDescriptor &DataSchema::Column(int32_t idx) const { MS_ASSERT(idx < static_cast(col_descs_.size())); return col_descs_[idx]; } @@ -411,9 +423,9 @@ void DataSchema::Print(std::ostream &out) const { Status DataSchema::AddColumn(const ColDescriptor &cd) { // Sanity check there's not a duplicate name before adding the column for (auto i = 0; i < col_descs_.size(); ++i) { - if (col_descs_[i].name() == cd.name()) { + if (col_descs_[i].Name() == cd.Name()) { std::ostringstream ss; - ss << "column name '" << cd.name() << "' already exists in schema."; + ss << "column name '" << cd.Name() << "' already exists in schema."; std::string err_msg = ss.str(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -439,11 +451,11 @@ Status DataSchema::GetColumnNameMap(std::unordered_map *ou } for (size_t i = 0; i < col_descs_.size(); ++i) { - if (col_descs_[i].name().empty()) { + if (col_descs_[i].Name().empty()) { return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Constructing column name map from schema, but found empty column name."); } - (*out_column_name_map)[col_descs_[i].name()] = i; + (*out_column_name_map)[col_descs_[i].Name()] = i; } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/data_schema.h b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h index d9f556c22ac..a92f64a3855 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/data_schema.h +++ b/mindspore/ccsrc/minddata/dataset/engine/data_schema.h @@ -81,27 +81,27 @@ class ColDescriptor { /// \brief getter function /// \return The column's DataType - DataType type() const { return type_; } + DataType Type() const { return type_; } /// \brief getter function /// \return The column's rank - int32_t rank() const { return rank_; } + int32_t Rank() const { return rank_; } /// \brief getter function /// \return The column's name - std::string name() const { return col_name_; } + std::string Name() const { return col_name_; } /// \brief getter function /// \return The column's shape - TensorShape shape() const; + TensorShape Shape() const; /// \brief getter function /// \return TF if the column has an assigned fixed shape. - bool hasShape() const { return tensor_shape_ != nullptr; } + bool HasShape() const { return tensor_shape_ != nullptr; } /// \brief getter function /// \return The column's tensor implementation type - TensorImpl tensorImpl() const { return tensor_impl_; } + TensorImpl GetTensorImpl() const { return tensor_impl_; } private: DataType type_; // The columns type @@ -153,7 +153,7 @@ class DataSchema { /// \brief getter /// \return The reference to a ColDescriptor to get (const version) - const ColDescriptor &column(int32_t idx) const; + const ColDescriptor &Column(int32_t idx) const; /// \brief getter /// \return The number of columns in the schema @@ -163,7 +163,7 @@ class DataSchema { /// \brief getter /// \return The number of rows read from schema - int64_t num_rows() const { return num_rows_; } + int64_t NumRows() const { return num_rows_; } static const char DEFAULT_DATA_SCHEMA_FILENAME[]; diff --git a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc index 2b722a0d0e3..86024e94698 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.cc @@ -14,6 +14,7 @@ * limitations under the License. */ #include "minddata/dataset/engine/dataset_iterator.h" +#include #include #include #include "minddata/dataset/core/data_type.h" diff --git a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h index 54b0768b198..e2d75efd1c0 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h +++ b/mindspore/ccsrc/minddata/dataset/engine/dataset_iterator.h @@ -49,7 +49,7 @@ class DatasetIterator { // @return The string to column id mapping. std::unordered_map GetColumnNameMap() const; - bool eof_handled() const { return eof_handled_; } + bool EofHandled() const { return eof_handled_; } // Fetches one row of data from the iterator. // the base class version simply performs error handling and returns empty row. Actual @@ -108,11 +108,11 @@ class ChildIterator { std::unordered_map GetColumnNameMap() const; // Return T/F if end of epoch - bool end_of_epoch() { return end_epoch_; } + bool EndOfEpoch() { return end_epoch_; } // Getter // @return T/F if this iterator is completely done after getting an eof - bool eof_handled() const { return eof_handled_; } + bool EofHandled() const { return eof_handled_; } private: DatasetOp *current_op_; // The parent operator. We consume from it's children. diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc index df47d471350..ee7c1185b73 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/barrier_op.cc @@ -113,6 +113,7 @@ Status BarrierOp::blockCond() { // fetches next Barrier row Status BarrierOp::getNextTensorRow(TensorRow *new_row) { + RETURN_UNEXPECTED_IF_NULL(new_row); // iterate over all iterators and generate a row RETURN_IF_NOT_OK((child_iterator_)->FetchNextTensorRow(new_row)); // add each new row to iterator, check if row is empty, if row from iterator is empty return empty row @@ -122,7 +123,7 @@ Status BarrierOp::getNextTensorRow(TensorRow *new_row) { MS_LOG(INFO) << "Barrier operator child iterator produced empty row."; clean_up_ = true; // If we picked up an eof here, then we are completely done. - if ((child_iterator_)->eof_handled()) { + if ((child_iterator_)->EofHandled()) { MS_LOG(INFO) << "Barrier operator iterator got EOF."; eof_ = true; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc index f6614995b88..5b045c0ecfc 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc @@ -36,6 +36,7 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa } Status BatchOp::Builder::Build(std::shared_ptr *ptr) { + RETURN_UNEXPECTED_IF_NULL(ptr); #ifdef ENABLE_PYTHON *ptr = std::make_shared(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_, builder_num_workers_, builder_in_names_, builder_out_names_, @@ -106,7 +107,7 @@ Status BatchOp::operator()() { RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); int32_t cur_batch_size = 0; RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0))); - while (child_iterator_->eof_handled() == false) { + while (child_iterator_->EofHandled() == false) { while (new_row.empty() == false) { table->emplace_back(new_row); // if # of rows is enough to make 1 batch, send it to worker_queue @@ -142,7 +143,7 @@ Status BatchOp::operator()() { << "reduce memory usage."; } #endif - } // end of eof_handled() == false + } // end of EofHandled() == false RETURN_IF_NOT_OK( worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOF)))); // EOF received, send quit signal to all workers @@ -168,6 +169,8 @@ void BatchOp::Print(std::ostream &out, bool show_all) const { } Status BatchOp::BatchRows(const std::unique_ptr *src, TensorRow *dest, dsize_t batch_size) { + RETURN_UNEXPECTED_IF_NULL(src); + RETURN_UNEXPECTED_IF_NULL(dest); if ((*src)->size() != batch_size) { RETURN_STATUS_UNEXPECTED("[Internal ERROR] Source table size does not match the batch_size."); } @@ -274,6 +277,8 @@ Status BatchOp::EoeReceived(int32_t) { #ifdef ENABLE_PYTHON Status BatchOp::MapColumns(std::pair, CBatchInfo> *table_pair) { + RETURN_UNEXPECTED_IF_NULL(table_pair); + RETURN_UNEXPECTED_IF_NULL(table_pair->first); std::unique_ptr in_q_table = std::move(table_pair->first); size_t num_rows = in_q_table->size(); auto out_q_table = std::make_unique(num_rows, TensorRow(column_name_id_map_.size(), nullptr)); @@ -316,6 +321,7 @@ Status BatchOp::MapColumns(std::pair, CBatchInfo> #endif Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { + RETURN_UNEXPECTED_IF_NULL(batch_size); #ifdef ENABLE_PYTHON if (batch_size_func_) { RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info)); @@ -330,6 +336,7 @@ Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) { #ifdef ENABLE_PYTHON Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { + RETURN_UNEXPECTED_IF_NULL(batch_size); { // Acquire Python GIL py::gil_scoped_acquire gil_acquire; @@ -355,6 +362,8 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) { } Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info) { + RETURN_UNEXPECTED_IF_NULL(input); + RETURN_UNEXPECTED_IF_NULL(output); { // Acquire Python GIL py::gil_scoped_acquire gil_acquire; @@ -471,6 +480,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info, const std::unordered_map &column_name_id_map, std::set *pad_cols, std::vector> *pad_vals, std::vector> *pad_shapes) { + RETURN_UNEXPECTED_IF_NULL(pad_cols); + RETURN_UNEXPECTED_IF_NULL(pad_vals); + RETURN_UNEXPECTED_IF_NULL(pad_shapes); if (pad_info.empty()) { // if pad_info empty, pad every columns automatically for (size_t col_id = 0; col_id < column_name_id_map.size(); col_id++) { pad_cols->insert(col_id); @@ -561,6 +573,7 @@ int64_t BatchOp::GetTreeBatchSize() { } Status BatchOp::GetNextRowPullMode(TensorRow *const row) { + RETURN_UNEXPECTED_IF_NULL(row); std::unique_ptr table = std::make_unique(); child_iterator_ = std::make_unique(this, 0, 0); int32_t cur_batch_size = 0; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc index e9b61aeb8ec..1f8ef1b4b5a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/bucket_batch_by_length_op.cc @@ -60,7 +60,7 @@ Status BucketBatchByLengthOp::operator()() { TensorRow current_row; child_iterator_ = std::make_unique(this, 0, 0); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(¤t_row)); - while (!child_iterator_->eof_handled()) { + while (!child_iterator_->EofHandled()) { while (!current_row.empty()) { int32_t element_length; RETURN_IF_NOT_OK(ObtainElementLength(&element_length, current_row)); @@ -99,6 +99,7 @@ Status BucketBatchByLengthOp::operator()() { } Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, TensorRow element) { + RETURN_UNEXPECTED_IF_NULL(out_element_length); // call pyfunc here if given pyfunc, otherwise return 0th dimension of shape of // the single column specified in length_dependent_columns_ if (element_length_function_) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc index 67c280b3eaa..cf6fe16bae3 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_sentence_piece_vocab_op.cc @@ -52,7 +52,7 @@ Status BuildSentencePieceVocabOp::operator()() { RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); bool eoe_warning = false; // give out warning if receive more than 1 eoe - while (child_iterator_->eof_handled() == false) { + while (child_iterator_->EofHandled() == false) { while (new_row.empty() == false) { RETURN_IF_NOT_OK(sentence_queue_->EmplaceBack(new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc index be363ade17a..66bdc5eb079 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/build_vocab_op.cc @@ -107,7 +107,7 @@ Status BuildVocabOp::operator()() { } } bool eoe_warning = false; // give out warning if receive more than 1 eoe - while (child_iterator_->eof_handled() == false) { + while (child_iterator_->EofHandled() == false) { while (new_row.empty() == false) { RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(new_row)); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc index 7d02443ac6e..6c5349cd12e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/device_queue_op.cc @@ -593,7 +593,7 @@ Status DeviceQueueOp::SendDataToCPU() { MS_LOG(INFO) << "Device queue, sending data to CPU."; int64_t total_batch = 0; - while (!(child_iterator_->eof_handled())) { + while (!(child_iterator_->EofHandled())) { TensorRow curr_row; RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc index 2a1983a4ef0..d0d5baac2ad 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/filter_op.cc @@ -62,7 +62,7 @@ Status FilterOp::operator()() { TensorRow new_row; RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); int64_t cnt = 0; - while (child_iterator_->eof_handled() == false) { + while (child_iterator_->EofHandled() == false) { while (new_row.empty() == false) { RETURN_IF_NOT_OK(worker_queues_[cnt % num_workers_]->EmplaceBack(new_row)); cnt++; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc index 2d4643eb95e..37eacfad944 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/shuffle_op.cc @@ -124,7 +124,7 @@ Status ShuffleOp::operator()() { RETURN_IF_NOT_OK(InitShuffleBuffer()); // This is our main loop exit condition, when the iterator has no more data completely. - if (child_iterator_->eof_handled()) { + if (child_iterator_->EofHandled()) { RETURN_IF_NOT_OK(out_connector_->SendEOF()); break; } @@ -214,7 +214,7 @@ Status ShuffleOp::InitShuffleBuffer() { TensorRow new_row; RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); - if (child_iterator_->eof_handled()) { + if (child_iterator_->EofHandled()) { MS_LOG(DEBUG) << "Shuffle operator init picked up EOF. No more epochs."; RETURN_IF_NOT_OK(out_connector_->SendEOF()); return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc index 3c8af4dd067..42f17df4a78 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/album_op.cc @@ -43,7 +43,7 @@ AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, boo curr_row_(0) { // Set the column name map (base class field) for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } io_block_queues_.Init(num_workers_, queue_size); } @@ -70,8 +70,8 @@ Status AlbumOp::PrescanEntry() { } MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << "."; - while (dirItr->hasNext()) { - Path file = dirItr->next(); + while (dirItr->HasNext()) { + Path file = dirItr->Next(); if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { (void)image_rows_.push_back(file.toString().substr(dirname_offset_)); } else { @@ -192,7 +192,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr label; // consider templating this function to handle all ints - if (data_schema_->column(col_num).type() == DataType::DE_INT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -201,7 +201,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); - } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -212,7 +212,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label)); } else { RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " + - data_schema_->column(col_num).type().ToString()); + data_schema_->Column(col_num).Type().ToString()); } row->push_back(std::move(label)); return Status::OK(); @@ -221,7 +221,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr float_array; // consider templating this function to handle all ints - if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -230,7 +230,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); - } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -241,14 +241,15 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array)); } else { RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " + - data_schema_->column(col_num).type().ToString()); + data_schema_->Column(col_num).Type().ToString()); } row->push_back(std::move(float_array)); return Status::OK(); } Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) { - if (data_schema_->column(col_num).type() == DataType::DE_STRING) { + RETURN_UNEXPECTED_IF_NULL(row); + if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) { TensorPtr id; RETURN_IF_NOT_OK(Tensor::CreateScalar(file, &id)); row->push_back(std::move(id)); @@ -266,7 +267,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) { // hack to get the file name without extension, the 1 is to get rid of the backslash character TensorPtr empty_tensor; - RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor)); + RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), &empty_tensor)); row->push_back(std::move(empty_tensor)); return Status::OK(); } @@ -277,11 +278,11 @@ Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) { // only be float32, seems like a weird limitation to impose Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr float_tensor; - if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) { double data = json_obj; MS_LOG(INFO) << "double found: " << json_obj << "."; RETURN_IF_NOT_OK(Tensor::CreateScalar(data, &float_tensor)); - } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) { float data1 = json_obj; RETURN_IF_NOT_OK(Tensor::CreateScalar(data1, &float_tensor)); MS_LOG(INFO) << "float found: " << json_obj << "."; @@ -293,11 +294,11 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, // Loads a tensor with int value, we have to cast the value to type specified in the schema. Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) { TensorPtr int_tensor; - if (data_schema_->column(col_num).type() == DataType::DE_INT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) { int64_t data = json_obj; MS_LOG(INFO) << "int64 found: " << json_obj << "."; RETURN_IF_NOT_OK(Tensor::CreateScalar(data, &int_tensor)); - } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) { int32_t data = json_obj; RETURN_IF_NOT_OK(Tensor::CreateScalar(data, &int_tensor)); MS_LOG(INFO) << "int32 found: " << json_obj << "."; @@ -349,35 +350,35 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row) { int32_t i = index; // special case to handle - if (data_schema_->column(i).name() == "id") { + if (data_schema_->Column(i).Name() == "id") { // id is internal, special case to load from file return LoadIDTensor(file, i, row); } // find if key does not exist, insert placeholder nullptr if not found - if (js.find(data_schema_->column(i).name()) == js.end()) { + if (js.find(data_schema_->Column(i).Name()) == js.end()) { // iterator not found, push nullptr as placeholder - MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << "."; + MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << "."; return LoadEmptyTensor(i, row); } - nlohmann::json column_value = js.at(data_schema_->column(i).name()); - MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << "."; + nlohmann::json column_value = js.at(data_schema_->Column(i).Name()); + MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << "."; bool is_array = column_value.is_array(); // load single string - if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) { + if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) { return LoadStringTensor(column_value, i, row); } // load string array - if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) { + if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) { return LoadStringArrayTensor(column_value, i, row); } // load image file - if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) { + if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) { std::string image_file_path = column_value; return LoadImageTensor(image_file_path, i, row); } // load float value - bool judge_float = (data_schema_->column(i).type() == DataType::DE_FLOAT32) || - (data_schema_->column(i).type() == DataType::DE_FLOAT64); + bool judge_float = (data_schema_->Column(i).Type() == DataType::DE_FLOAT32) || + (data_schema_->Column(i).Type() == DataType::DE_FLOAT64); if (!is_array && judge_float) { return LoadFloatTensor(column_value, i, row); } @@ -387,15 +388,15 @@ Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann: } // int value if (!is_array && - (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { + (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) { return LoadIntTensor(column_value, i, row); } // int array if (is_array && - (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { + (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) { return LoadIntArrayTensor(column_value, i, row); } else { - MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported."; + MS_LOG(WARNING) << "Value type for column: " << data_schema_->Column(i).Name() << " is not supported."; return Status::OK(); } } @@ -438,7 +439,7 @@ Status AlbumOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc index f9c8be4423c..1e6d79a23e7 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/celeba_op.cc @@ -258,7 +258,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) { } RETURN_IF_NOT_OK( - Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->column(1).type(), &label)); + Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->Column(1).Type(), &label)); RETURN_IF_NOT_OK(label->Zero()); for (uint32_t index = 0; index < image_label.second.size(); index++) { if (image_label.second[index] == 1) { @@ -294,7 +294,7 @@ Status CelebAOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t index = 0; index < data_schema_->NumColumns(); index++) { - column_name_id_map_[data_schema_->column(index).name()] = index; + column_name_id_map_[data_schema_->Column(index).Name()] = index; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc index 6b865917ed4..4f752201dcd 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/cifar_op.cc @@ -205,8 +205,8 @@ Status CifarOp::GetCifarFiles() { Path dir_path(folder_path_); auto dirIt = Path::DirIterator::OpenDirectory(&dir_path); if (dirIt) { - while (dirIt->hasNext()) { - Path file = dirIt->next(); + while (dirIt->HasNext()) { + Path file = dirIt->Next(); if (file.Extension() == kExtension) { cifar_files_.push_back(file.toString()); } @@ -236,7 +236,7 @@ Status CifarOp::ParseCifarData() { std::shared_ptr image_tensor; RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}), - data_schema_->column(0).type(), &image_tensor)); + data_schema_->Column(0).Type(), &image_tensor)); auto itr = image_tensor->begin(); uint32_t total_pix = kCifarImageHeight * kCifarImageWidth; for (uint32_t pix = 0; pix < total_pix; ++pix) { @@ -369,7 +369,7 @@ Status CifarOp::ComputeColMap() { // set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc index 824980c296c..ac946c0edb5 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/coco_op.cc @@ -86,7 +86,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { } std::string kImageFile = image_folder_path_ + std::string("/") + image_id; - RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); + RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image)); auto bboxRow = itr->second; std::vector bbox_row; @@ -505,7 +505,7 @@ Status CocoOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc index e4e895a81ed..6a3c17f39c2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/flickr_op.cc @@ -240,7 +240,7 @@ Status FlickrOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc index 00b7ae4251f..782a2d87f61 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/image_folder_op.cc @@ -156,8 +156,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_name); } std::set imgs; // use this for ordering - while (dirItr->hasNext()) { - Path file = dirItr->next(); + while (dirItr->HasNext()) { + Path file = dirItr->Next(); if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) { (void)imgs.insert(file.toString().substr(dirname_offset_)); } else { @@ -182,8 +182,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) { Status ImageFolderOp::RecursiveWalkFolder(Path *dir) { std::shared_ptr dir_itr = Path::DirIterator::OpenDirectory(dir); RETURN_UNEXPECTED_IF_NULL(dir_itr); - while (dir_itr->hasNext()) { - Path subdir = dir_itr->next(); + while (dir_itr->HasNext()) { + Path subdir = dir_itr->Next(); if (subdir.IsDirectory()) { if (class_index_.empty() || class_index_.find(subdir.toString().substr(dirname_offset_ + 1)) != class_index_.end()) { @@ -256,8 +256,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se std::queue folder_paths; std::shared_ptr dir_itr = Path::DirIterator::OpenDirectory(&dir); std::unordered_set folder_names; - while (dir_itr->hasNext()) { - Path subdir = dir_itr->next(); + while (dir_itr->HasNext()) { + Path subdir = dir_itr->Next(); if (subdir.IsDirectory()) { folder_paths.push(subdir.toString()); if (!class_index.empty()) folder_names.insert(subdir.Basename()); @@ -283,7 +283,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se if (subdir.Exists() == false || dir_itr == nullptr) { RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.toString()); } - while (dir_itr->hasNext()) { + while (dir_itr->HasNext()) { if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) { ++row_cnt; } @@ -298,7 +298,7 @@ Status ImageFolderOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc index 7fbba5daaaf..62134cedec4 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/manifest_op.cc @@ -274,7 +274,7 @@ Status ManifestOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc index ae033067d5b..beb23ec80e6 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/mindrecord_op.cc @@ -113,7 +113,7 @@ Status MindRecordOp::Init() { CHECK_FAIL_RETURN_UNEXPECTED( colname_to_ind.find(colname) != colname_to_ind.end(), "Invalid data, specified loading column name: " + colname + " does not exist in data file."); - RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname]))); + RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname]))); } data_schema_ = std::move(tmp_schema); } @@ -271,8 +271,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector tensor; - const ColDescriptor &column = data_schema_->column(i_col); - DataType type = column.type(); + const ColDescriptor &column = data_schema_->Column(i_col); + DataType type = column.Type(); // Set shape CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0."); @@ -280,8 +280,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector image; - RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(), + RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->Column(0).Type(), reinterpret_cast(pixels), &image)); image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j])); image_path_.push_back(image_names_[index]); @@ -225,8 +225,8 @@ Status MnistOp::WalkAllFiles() { std::string prefix; // empty string, used to match usage = "" (default) or usage == "all" if (usage_ == "train" || usage_ == "test") prefix = (usage_ == "test" ? test_prefix : train_prefix); if (dir_it != nullptr) { - while (dir_it->hasNext()) { - Path file = dir_it->next(); + while (dir_it->HasNext()) { + Path file = dir_it->Next(); std::string fname = file.Basename(); // name of the mnist file if ((fname.find(prefix + "-images") != std::string::npos) && (fname.find(img_ext) != std::string::npos)) { image_names_.push_back(file.toString()); @@ -307,7 +307,7 @@ Status MnistOp::ComputeColMap() { // set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc index 64cdb151a7d..b5a81ec2a08 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/random_data_op.cc @@ -267,8 +267,8 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) { // Create a tensor for each column, then add the tensor to the row for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - const ColDescriptor current_col = data_schema_->column(i); - std::vector current_shape = current_col.shape().AsVector(); + const ColDescriptor current_col = data_schema_->Column(i); + std::vector current_shape = current_col.Shape().AsVector(); std::unique_ptr new_shape = nullptr; std::unique_ptr buf = nullptr; std::shared_ptr new_tensor = nullptr; @@ -282,7 +282,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) { } new_shape = std::make_unique(current_shape); - int64_t size_in_bytes = new_shape->NumOfElements() * current_col.type().SizeInBytes(); + int64_t size_in_bytes = new_shape->NumOfElements() * current_col.Type().SizeInBytes(); // Generate a random byte of data. This may cause some funny data for things like doubles,floats, bools // however the random data op is not too concerned about the physical data itself. @@ -296,7 +296,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) { return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor."); } - RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.type(), buf.get(), &new_tensor)); + RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor)); // Add this tensor to the tensor row for output (*new_row).push_back(std::move(new_tensor)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc index 715bf993ab9..1441dc9f41b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/sampler/sampler.cc @@ -75,7 +75,7 @@ Status SamplerRT::CreateSamplerTensor(std::shared_ptr *sample_ids, int64 col_desc_ = std::make_unique("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); } TensorShape shape(std::vector(1, num_elements)); - RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->type(), sample_ids)); + RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->Type(), sample_ids)); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc index db6d1b4dd43..a9dfd672e02 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/text_file_op.cc @@ -225,7 +225,7 @@ Status TextFileOp::ComputeColMap() { // Set the column name mapping (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc index fda009a0d75..763673de558 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/tf_reader_op.cc @@ -123,7 +123,7 @@ Status TFReaderOp::Init() { } if (total_rows_ == 0) { - total_rows_ = data_schema_->num_rows(); + total_rows_ = data_schema_->NumRows(); } if (total_rows_ < 0) { RETURN_STATUS_UNEXPECTED( @@ -332,12 +332,12 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *out_row) { int32_t num_columns = data_schema_->NumColumns(); for (int32_t col = 0; col < num_columns; ++col) { - const ColDescriptor current_col = data_schema_->column(col); + const ColDescriptor current_col = data_schema_->Column(col); const dataengine::Features &example_features = tf_file->features(); const google::protobuf::Map &feature_map = example_features.feature(); - auto iter_column = feature_map.find(current_col.name()); + auto iter_column = feature_map.find(current_col.Name()); if (iter_column == feature_map.end()) { - RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.name() + " does not exist."); + RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist."); } const dataengine::Feature &column_values_list = iter_column->second; RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col)); @@ -379,7 +379,7 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature // into the tensor TensorShape current_shape = TensorShape::CreateUnknownRankShape(); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, ¤t_shape)); - RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.type(), data_ptr, &ts)); + RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.Type(), data_ptr, &ts)); break; } case dataengine::Feature::KindCase::kInt64List: { @@ -406,10 +406,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng // kBytesList can map to the following DE types ONLY! // DE_UINT8, DE_INT8 // Must be single byte type for each element! - if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8 && - current_col.type() != DataType::DE_STRING) { - std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + - ", data type should be int8, uint8 or string, but got " + current_col.type().ToString(); + if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 && + current_col.Type() != DataType::DE_STRING) { + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + + ", data type should be int8, uint8 or string, but got " + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -417,7 +417,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng *num_elements = bytes_list.value_size(); - if (current_col.type() == DataType::DE_STRING) { + if (current_col.Type() == DataType::DE_STRING) { TensorShape shape = TensorShape::CreateScalar(); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape)); RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor)); @@ -436,14 +436,14 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng int64_t pad_size = max_size; // if user provides a shape in the form of [-1, d1, 2d, ... , dn], we need to pad to d1 * d2 * ... * dn - if (current_col.hasShape()) { - TensorShape cur_shape = current_col.shape(); + if (current_col.HasShape()) { + TensorShape cur_shape = current_col.Shape(); if (cur_shape.Size() >= 2 && cur_shape[0] == TensorShape::kDimUnknown) { int64_t new_pad_size = 1; for (int i = 1; i < cur_shape.Size(); ++i) { if (cur_shape[i] == TensorShape::kDimUnknown) { std::string err_msg = - "Invalid data, more than one unknown dimension in the shape of column: " + current_col.name(); + "Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name(); RETURN_STATUS_UNEXPECTED(err_msg); } new_pad_size *= cur_shape[i]; @@ -451,7 +451,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng pad_size = new_pad_size; } else { if (cur_shape.known() && cur_shape.NumOfElements() != max_size) { - std::string err_msg = "Shape in schema's column '" + current_col.name() + "' is incorrect." + + std::string err_msg = "Shape in schema's column '" + current_col.Name() + "' is incorrect." + "\nshape received: " + cur_shape.ToString() + "\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) + "\nexpected total elements in shape: " + std::to_string(max_size); @@ -463,7 +463,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng // know how many elements there are and the total bytes, create tensor here: TensorShape current_shape = TensorShape::CreateScalar(); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, ¤t_shape)); - RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.type(), pad_size, tensor)); + RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.Type(), pad_size, tensor)); return Status::OK(); } @@ -472,9 +472,9 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng int32_t *num_elements, std::unique_ptr *float_array) { // KFloatList can only map to DE types: // DE_FLOAT32 - if (current_col.type() != DataType::DE_FLOAT32) { - std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + - ", data type should be string, but got " + current_col.type().ToString(); + if (current_col.Type() != DataType::DE_FLOAT32) { + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + + ", data type should be string, but got " + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -494,26 +494,26 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng // Determines which template type to use and calls LoadIntList Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, int32_t *num_elements, std::shared_ptr *tensor) { - if (current_col.type() == DataType::DE_UINT64) { + if (current_col.Type() == DataType::DE_UINT64) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); - } else if (current_col.type() == DataType::DE_INT64) { + } else if (current_col.Type() == DataType::DE_INT64) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); - } else if (current_col.type() == DataType::DE_UINT32) { + } else if (current_col.Type() == DataType::DE_UINT32) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); - } else if (current_col.type() == DataType::DE_INT32) { + } else if (current_col.Type() == DataType::DE_INT32) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); - } else if (current_col.type() == DataType::DE_UINT16) { + } else if (current_col.Type() == DataType::DE_UINT16) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); - } else if (current_col.type() == DataType::DE_INT16) { + } else if (current_col.Type() == DataType::DE_INT16) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); - } else if (current_col.type() == DataType::DE_UINT8) { + } else if (current_col.Type() == DataType::DE_UINT8) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); - } else if (current_col.type() == DataType::DE_INT8) { + } else if (current_col.Type() == DataType::DE_INT8) { RETURN_IF_NOT_OK(LoadIntList(current_col, column_values_list, num_elements, tensor)); } else { - std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.name() + + std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() + ", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" + - ", but got " + current_col.type().ToString(); + ", but got " + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -525,9 +525,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dat template Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list, int32_t *num_elements, std::shared_ptr *tensor) { - if (!(current_col.type().IsInt())) { - std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() + - ", data type should be int, but got " + current_col.type().ToString(); + if (!(current_col.Type().IsInt())) { + std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() + + ", data type should be int, but got " + current_col.Type().ToString(); RETURN_STATUS_UNEXPECTED(err_msg); } @@ -540,7 +540,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin // know how many elements there are, create tensor here: TensorShape current_shape = TensorShape::CreateUnknownRankShape(); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, ¤t_shape)); - RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.type(), tensor)); + RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.Type(), tensor)); int64_t i = 0; auto it = (*tensor)->begin(); @@ -719,7 +719,7 @@ Status TFReaderOp::ComputeColMap() { // Construct the column name map for this operator (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc index fa94aef0d23..42c69d912e9 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/source/voc_op.cc @@ -83,8 +83,8 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { std::shared_ptr image, target; const std::string kTargetFile = folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension); - RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); - RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target)); + RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image)); + RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target)); (*trow) = TensorRow(row_id, {std::move(image), std::move(target)}); path_list = {kImageFile, kTargetFile}; } else if (task_type_ == TaskType::Detection) { @@ -92,7 +92,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) { TensorRow annotation; const std::string kAnnotationFile = folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension); - RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image)); + RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image)); RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation)); trow->setId(row_id); trow->push_back(std::move(image)); @@ -326,7 +326,7 @@ Status VOCOp::ComputeColMap() { // Set the column name map (base class field) if (column_name_id_map_.empty()) { for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) { - column_name_id_map_[data_schema_->column(i).name()] = i; + column_name_id_map_[data_schema_->Column(i).Name()] = i; } } else { MS_LOG(WARNING) << "Column name map is already set!"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc index 6365622c8b4..b7240006c8d 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/execution_tree.cc @@ -62,6 +62,7 @@ ExecutionTree::~ExecutionTree() { // provides it with a link to the tree. A node cannot form any relationships (parent/child) with // other nodes unless they are associated with the same tree. Status ExecutionTree::AssociateNode(const std::shared_ptr &op) { + RETURN_UNEXPECTED_IF_NULL(op); // If we are already a part of the tree, no-op if (op->tree_ == this) { return Status::OK(); @@ -88,6 +89,7 @@ Status ExecutionTree::AssociateNode(const std::shared_ptr &op) { // Sets the root node of the tree Status ExecutionTree::AssignRoot(const std::shared_ptr &op) { + RETURN_UNEXPECTED_IF_NULL(op); // Tree must be in building state before we can assign root to it if (tree_state_ != kDeTStateBuilding) { std::string err_msg = @@ -121,6 +123,9 @@ void ExecutionTree::Print(std::ostream &out, const std::shared_ptr &o // A helper functions for doing the recursive printing void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr &dataset_op, std::string indent, bool last, bool detailed) const { + if (dataset_op == nullptr) { + return; + } // Decide which printer to use based on detailed arg. if (!detailed) { out << indent << "+- " << *dataset_op; diff --git a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc index 100cdb0c605..56d9fa7fd7a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_data_impl.cc @@ -41,6 +41,7 @@ GraphDataImpl::GraphDataImpl(std::string dataset_file, int32_t num_workers, bool GraphDataImpl::~GraphDataImpl() {} Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr *out) { + RETURN_UNEXPECTED_IF_NULL(out); auto itr = node_type_map_.find(node_type); if (itr == node_type_map_.end()) { std::string err_msg = "Invalid node type:" + std::to_string(node_type); @@ -54,6 +55,7 @@ Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr *o template Status GraphDataImpl::CreateTensorByVector(const std::vector> &data, DataType type, std::shared_ptr *out) { + RETURN_UNEXPECTED_IF_NULL(out); if (!type.IsCompatible()) { RETURN_STATUS_UNEXPECTED("Data type not compatible"); } @@ -96,6 +98,7 @@ Status GraphDataImpl::ComplementVector(std::vector> *data, size_t } Status GraphDataImpl::GetAllEdges(EdgeType edge_type, std::shared_ptr *out) { + RETURN_UNEXPECTED_IF_NULL(out); auto itr = edge_type_map_.find(edge_type); if (itr == edge_type_map_.end()) { std::string err_msg = "Invalid edge type:" + std::to_string(edge_type); @@ -110,6 +113,7 @@ Status GraphDataImpl::GetNodesFromEdges(const std::vector &edge_list if (edge_list.empty()) { RETURN_STATUS_UNEXPECTED("Input edge_list is empty"); } + RETURN_UNEXPECTED_IF_NULL(out); std::vector> node_list; node_list.reserve(edge_list.size()); @@ -156,6 +160,7 @@ Status GraphDataImpl::GetAllNeighbors(const std::vector &node_list, const OutputFormat &format, std::shared_ptr *out) { CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type)); + RETURN_UNEXPECTED_IF_NULL(out); std::vector> neighbors; @@ -251,6 +256,7 @@ Status GraphDataImpl::GetSampledNeighbors(const std::vector &node_li for (const auto &type : neighbor_types) { RETURN_IF_NOT_OK(CheckNeighborType(type)); } + RETURN_UNEXPECTED_IF_NULL(out); std::vector> neighbors_vec(node_list.size()); for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) { std::shared_ptr input_node; @@ -285,6 +291,7 @@ Status GraphDataImpl::NegativeSample(const std::vector &data, const size_t *start_index, const std::unordered_set &exclude_data, int32_t samples_num, std::vector *out_samples) { CHECK_FAIL_RETURN_UNEXPECTED(!data.empty(), "Input data is empty."); + RETURN_UNEXPECTED_IF_NULL(start_index); size_t index = *start_index; for (size_t i = index; i < shuffled_ids.size(); ++i) { ++index; @@ -305,6 +312,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector &node CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty."); RETURN_IF_NOT_OK(CheckSamplesNum(samples_num)); RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type)); + RETURN_UNEXPECTED_IF_NULL(out); const std::vector &all_nodes = node_type_map_[neg_neighbor_type]; std::vector shuffled_id(all_nodes.size()); @@ -321,9 +329,9 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector &node std::vector neighbors; RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors)); std::unordered_set exclude_nodes; - std::transform(neighbors.begin(), neighbors.end(), - std::insert_iterator>(exclude_nodes, exclude_nodes.begin()), - [](const NodeIdType node) { return node; }); + (void)std::transform(neighbors.begin(), neighbors.end(), + std::insert_iterator>(exclude_nodes, exclude_nodes.begin()), + [](const NodeIdType node) { return node; }); neg_neighbors_vec[node_idx].emplace_back(node->id()); if (all_nodes.size() > exclude_nodes.size()) { while (neg_neighbors_vec[node_idx].size() < samples_num + 1) { @@ -355,6 +363,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector &node Status GraphDataImpl::RandomWalk(const std::vector &node_list, const std::vector &meta_path, float step_home_param, float step_away_param, NodeIdType default_node, std::shared_ptr *out) { + RETURN_UNEXPECTED_IF_NULL(out); RETURN_IF_NOT_OK(random_walk_.Build(node_list, meta_path, step_home_param, step_away_param, default_node)); std::vector> walks; RETURN_IF_NOT_OK(random_walk_.SimulateWalk(&walks)); @@ -363,6 +372,7 @@ Status GraphDataImpl::RandomWalk(const std::vector &node_list, const } Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr *out_feature) { + RETURN_UNEXPECTED_IF_NULL(out_feature); auto itr = default_node_feature_map_.find(feature_type); if (itr == default_node_feature_map_.end()) { std::string err_msg = "Invalid feature type:" + std::to_string(feature_type); @@ -374,6 +384,7 @@ Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::share } Status GraphDataImpl::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr *out_feature) { + RETURN_UNEXPECTED_IF_NULL(out_feature); auto itr = default_edge_feature_map_.find(feature_type); if (itr == default_edge_feature_map_.end()) { std::string err_msg = "Invalid feature type:" + std::to_string(feature_type); @@ -390,6 +401,7 @@ Status GraphDataImpl::GetNodeFeature(const std::shared_ptr &nodes, RETURN_STATUS_UNEXPECTED("Input nodes is empty"); } CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty"); + RETURN_UNEXPECTED_IF_NULL(out); TensorRow tensors; for (const auto &f_type : feature_types) { std::shared_ptr default_feature; @@ -436,6 +448,7 @@ Status GraphDataImpl::GetNodeFeatureSharedMemory(const std::shared_ptr & if (!nodes || nodes->Size() == 0) { RETURN_STATUS_UNEXPECTED("Input nodes is empty"); } + RETURN_UNEXPECTED_IF_NULL(out); TensorShape shape = nodes->shape().AppendDim(2); std::shared_ptr fea_tensor; RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor)); @@ -478,6 +491,7 @@ Status GraphDataImpl::GetEdgeFeature(const std::shared_ptr &edges, RETURN_STATUS_UNEXPECTED("Input edges is empty"); } CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty"); + RETURN_UNEXPECTED_IF_NULL(out); TensorRow tensors; for (const auto &f_type : feature_types) { std::shared_ptr default_feature; @@ -520,6 +534,7 @@ Status GraphDataImpl::GetEdgeFeatureSharedMemory(const std::shared_ptr & if (!edges || edges->Size() == 0) { RETURN_STATUS_UNEXPECTED("Input edges is empty"); } + RETURN_UNEXPECTED_IF_NULL(out); TensorShape shape = edges->shape().AppendDim(2); std::shared_ptr fea_tensor; RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor)); @@ -554,14 +569,15 @@ Status GraphDataImpl::Init() { } Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) { + RETURN_UNEXPECTED_IF_NULL(meta_info); meta_info->node_type.resize(node_type_map_.size()); - std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(), - [](auto itr) { return itr.first; }); + (void)std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(), + [](auto itr) { return itr.first; }); std::sort(meta_info->node_type.begin(), meta_info->node_type.end()); meta_info->edge_type.resize(edge_type_map_.size()); - std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(), - [](auto itr) { return itr.first; }); + (void)std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(), + [](auto itr) { return itr.first; }); std::sort(meta_info->edge_type.begin(), meta_info->edge_type.end()); for (const auto &node : node_type_map_) { @@ -594,6 +610,7 @@ Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) { #ifdef ENABLE_PYTHON Status GraphDataImpl::GraphInfo(py::dict *out) { + RETURN_UNEXPECTED_IF_NULL(out); MetaInfo meta_info; RETURN_IF_NOT_OK(GetMetaInfo(&meta_info)); (*out)["node_type"] = py::cast(meta_info.node_type); @@ -616,6 +633,7 @@ Status GraphDataImpl::LoadNodeAndEdge() { } Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr *node) { + RETURN_UNEXPECTED_IF_NULL(node); auto itr = node_id_map_.find(id); if (itr == node_id_map_.end()) { std::string err_msg = "Invalid node id:" + std::to_string(id); @@ -627,6 +645,7 @@ Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr *node } Status GraphDataImpl::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr *edge) { + RETURN_UNEXPECTED_IF_NULL(edge); auto itr = edge_id_map_.find(id); if (itr == edge_id_map_.end()) { std::string err_msg = "Invalid edge id:" + std::to_string(id); @@ -682,6 +701,7 @@ Status GraphDataImpl::RandomWalkBase::Build(const std::vector &node_ } Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::vector *walk_path) { + RETURN_UNEXPECTED_IF_NULL(walk_path); // Simulate a random walk starting from start node. auto walk = std::vector(1, start_node); // walk is an vector // walk simulate @@ -722,6 +742,7 @@ Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, } Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector> *walks) { + RETURN_UNEXPECTED_IF_NULL(walks); for (int32_t i = 0; i < num_walks_; ++i) { for (const auto &node : node_list_) { std::vector walk; @@ -734,6 +755,7 @@ Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector *node_probability) { + RETURN_UNEXPECTED_IF_NULL(node_probability); // Generate alias nodes std::shared_ptr node; RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(node_id, &node)); @@ -749,6 +771,7 @@ Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_ Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst, uint32_t meta_path_index, std::shared_ptr *edge_probability) { + RETURN_UNEXPECTED_IF_NULL(edge_probability); // Get the alias edge setup lists for a given edge. std::shared_ptr src_node; RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(src, &src_node)); @@ -760,6 +783,8 @@ Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, std::vector dst_neighbors; RETURN_IF_NOT_OK(dst_node->GetAllNeighbors(meta_path_[meta_path_index + 1], &dst_neighbors, true)); + CHECK_FAIL_RETURN_UNEXPECTED(step_home_param_ != 0, "Invalid data, step home parameter can't be zero."); + CHECK_FAIL_RETURN_UNEXPECTED(step_away_param_ != 0, "Invalid data, step away parameter can't be zero."); std::sort(dst_neighbors.begin(), dst_neighbors.end()); std::vector non_normalized_probability; for (const auto &dst_nbr : dst_neighbors) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc index 0bf4575517c..e77525b7770 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/gnn/graph_shared_memory.cc @@ -17,6 +17,8 @@ #include "minddata/dataset/engine/gnn/graph_shared_memory.h" #include +#include "debug/common.h" +#include "utils/ms_utils.h" #include "minddata/dataset/util/log_adapter.h" namespace mindspore { @@ -51,7 +53,9 @@ GraphSharedMemory::~GraphSharedMemory() { Status GraphSharedMemory::CreateSharedMemory() { if (memory_key_ == -1) { // ftok to generate unique key - memory_key_ = ftok(mr_file_.data(), kGnnSharedMemoryId); + auto realpath = Common::GetRealPath(mr_file_); + CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed, path=" + mr_file_); + memory_key_ = ftok(common::SafeCStr(realpath.value()), kGnnSharedMemoryId); CHECK_FAIL_RETURN_UNEXPECTED(memory_key_ != -1, "Failed to get key of shared memory. file_name:" + mr_file_); std::stringstream stream; stream << std::hex << memory_key_; @@ -89,6 +93,7 @@ Status GraphSharedMemory::DeleteSharedMemory() { Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) { // shmget returns an identifier in shmid + CHECK_FAIL_RETURN_UNEXPECTED(memory_size_ >= 0, "Invalid memory size, should be greater than zero."); int shmid = shmget(memory_key_, memory_size_, shmflg); CHECK_FAIL_RETURN_UNEXPECTED(shmid != -1, "Failed to get shared memory. key=0x" + memory_key_str_); @@ -103,6 +108,7 @@ Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) { Status GraphSharedMemory::InsertData(const uint8_t *data, int64_t len, int64_t *offset) { CHECK_FAIL_RETURN_UNEXPECTED(data, "Input data is nullptr."); CHECK_FAIL_RETURN_UNEXPECTED(len > 0, "Input len is invalid."); + CHECK_FAIL_RETURN_UNEXPECTED(offset, "Input offset is nullptr."); std::lock_guard lck(mutex_); CHECK_FAIL_RETURN_UNEXPECTED((memory_size_ - memory_offset_ >= len), diff --git a/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h b/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h index 680fdc27561..716fd23a909 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h +++ b/mindspore/ccsrc/minddata/dataset/engine/gpu_item_connector.h @@ -46,6 +46,7 @@ class GpuItemConnector : public Connector> { } Status Pop(int32_t worker_id, std::vector *result) noexcept override { + RETURN_UNEXPECTED_IF_NULL(result); { MS_ASSERT(worker_id < num_consumers_); std::unique_lock lock(m_); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc index 6a062658ecd..a08adbcdabb 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/dataset_node.cc @@ -30,6 +30,7 @@ namespace dataset { // Helper function to compute a default shuffle size Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, int64_t *shuffle_size) { + RETURN_UNEXPECTED_IF_NULL(shuffle_size); const int64_t average_files_multiplier = 4; const int64_t shuffle_max = 10000; int64_t avg_rows_per_file = 0; @@ -59,6 +60,7 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro // Helper function to inject a shuffle operator over top of current operator being built Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows, int32_t connector_que_size, std::shared_ptr *shuffle_op) { + RETURN_UNEXPECTED_IF_NULL(shuffle_op); int64_t shuffle_size = 0; RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size)); MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc index fb776c292b6..bf622d6aa71 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/map_node.cc @@ -59,6 +59,7 @@ void MapNode::Print(std::ostream &out) const { } Status MapNode::Build(std::vector> *const node_ops) { + RETURN_UNEXPECTED_IF_NULL(node_ops); std::vector> tensor_ops; // Build tensorOp from tensorOperation vector @@ -131,12 +132,16 @@ Status MapNode::ValidateParams() { // Visitor accepting method for IRNodePass Status MapNode::Accept(IRNodePass *const p, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(p); + RETURN_UNEXPECTED_IF_NULL(modified); // Downcast shared pointer then call visitor return p->Visit(shared_from_base(), modified); } // Visitor accepting method for IRNodePass Status MapNode::AcceptAfter(IRNodePass *const p, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(p); + RETURN_UNEXPECTED_IF_NULL(modified); // Downcast shared pointer then call visitor return p->VisitAfter(shared_from_base(), modified); } @@ -147,6 +152,7 @@ void MapNode::setOperations(const std::vector> std::vector> MapNode::operations() { return operations_; } Status MapNode::to_json(nlohmann::json *out_json) { + RETURN_UNEXPECTED_IF_NULL(out_json); nlohmann::json args; args["num_parallel_workers"] = num_workers_; args["input_columns"] = input_columns_; @@ -160,6 +166,7 @@ Status MapNode::to_json(nlohmann::json *out_json) { std::vector ops; std::vector cbs; for (auto op : operations_) { + RETURN_UNEXPECTED_IF_NULL(op); nlohmann::json op_args; RETURN_IF_NOT_OK(op->to_json(&op_args)); if (op->Name() == "PyFuncOp") { @@ -172,8 +179,8 @@ Status MapNode::to_json(nlohmann::json *out_json) { } } args["operations"] = ops; - std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs), - [](std::shared_ptr cb) -> int32_t { return cb->step_size(); }); + (void)std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs), + [](std::shared_ptr cb) -> int32_t { return cb != nullptr ? cb->step_size() : 0; }); args["callback"] = cbs; *out_json = args; return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc index 54d191be18a..543ab401990 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/album_node.cc @@ -106,8 +106,8 @@ Status AlbumNode::GetDatasetSize(const std::shared_ptr &size_ } std::set extensions = {".json", ".JSON"}; - while (dirItr->hasNext()) { - Path file = dirItr->next(); + while (dirItr->HasNext()) { + Path file = dirItr->Next(); if (extensions.empty() || extensions.find(file.Extension()) != extensions.end()) { num_rows += 1; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc index 5b7a676eb62..b13ce660f5a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/generator_node.cc @@ -73,9 +73,9 @@ Status GeneratorNode::Build(std::vector> *const node_ RETURN_IF_NOT_OK(data_schema->LoadSchemaString(schema_json_string, {})); for (int32_t i = 0; i < data_schema->NumColumns(); i++) { - ColDescriptor col = data_schema->column(i); - column_names_.push_back(col.name()); - column_types_.push_back((col.type())); + ColDescriptor col = data_schema->Column(i); + column_names_.push_back(col.Name()); + column_types_.push_back((col.Type())); } } std::shared_ptr sampler_rt = nullptr; diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc index e1183c49389..fee51c2489b 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/random_node.cc @@ -131,7 +131,7 @@ Status RandomNode::GetDatasetSize(const std::shared_ptr &size *dataset_size = dataset_size_; return Status::OK(); } - int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->num_rows(); + int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->NumRows(); *dataset_size = num_rows; dataset_size_ = *dataset_size; return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc index f68e798ce1e..4ccfe0ade04 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/datasetops/source/tf_record_node.cc @@ -22,6 +22,7 @@ #include #include +#include "debug/common.h" #include "minddata/dataset/engine/datasetops/source/tf_reader_op.h" #include "minddata/dataset/engine/jagged_connector.h" #include "minddata/dataset/engine/opt/pass.h" @@ -58,13 +59,9 @@ Status TFRecordNode::ValidateParams() { } for (const auto &f : dataset_files_) { - Path dataset_file(f); - if (!dataset_file.Exists()) { - std::string err_msg = "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist."; - MS_LOG(ERROR) << err_msg; - - return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg); - } + auto realpath = Common::GetRealPath(f); + CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), + "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist."); } if (num_samples_ < 0) { @@ -107,6 +104,7 @@ Status TFRecordNode::ValidateParams() { // Function to build TFRecordNode Status TFRecordNode::Build(std::vector> *const node_ops) { + RETURN_UNEXPECTED_IF_NULL(node_ops); // Sort the datasets file in a lexicographical order std::vector sorted_dir_files = dataset_files_; std::sort(sorted_dir_files.begin(), sorted_dir_files.end()); @@ -165,6 +163,8 @@ Status TFRecordNode::GetShardId(int32_t *const shard_id) { // Get Dataset size Status TFRecordNode::GetDatasetSize(const std::shared_ptr &size_getter, bool estimate, int64_t *dataset_size) { + RETURN_UNEXPECTED_IF_NULL(size_getter); + RETURN_UNEXPECTED_IF_NULL(dataset_size); if (dataset_size_ > 0) { *dataset_size = dataset_size_; return Status::OK(); @@ -189,6 +189,7 @@ Status TFRecordNode::GetDatasetSize(const std::shared_ptr &si // Get the file list of the specific shard ID Status TFRecordNode::GetShardFileList(std::vector *shard_filenames) { + RETURN_UNEXPECTED_IF_NULL(shard_filenames); if (!shard_filenames->empty()) { RETURN_STATUS_UNEXPECTED("The initial file list must be empty."); } @@ -201,6 +202,7 @@ Status TFRecordNode::GetShardFileList(std::vector *shard_filenames) } Status TFRecordNode::to_json(nlohmann::json *out_json) { + RETURN_UNEXPECTED_IF_NULL(out_json); nlohmann::json args; args["num_parallel_workers"] = num_workers_; args["dataset_files"] = dataset_files_; @@ -262,6 +264,7 @@ Status TFRecordNode::from_json(nlohmann::json json_obj, std::shared_ptr *sampler) { + RETURN_UNEXPECTED_IF_NULL(sampler); bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles); *sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_); return Status::OK(); @@ -281,12 +284,16 @@ Status TFRecordNode::MakeSimpleProducer() { // Visitor accepting method for IRNodePass Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(p); + RETURN_UNEXPECTED_IF_NULL(modified); // Downcast shared pointer then call visitor return p->Visit(shared_from_base(), modified); } // Visitor accepting method for IRNodePass Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(p); + RETURN_UNEXPECTED_IF_NULL(modified); // Downcast shared pointer then call visitor return p->VisitAfter(shared_from_base(), modified); } diff --git a/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h index dea086fe744..1a610b3f177 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h +++ b/mindspore/ccsrc/minddata/dataset/engine/jagged_connector.h @@ -43,6 +43,7 @@ class JaggedConnector : public Connector { } Status Pop(int32_t worker_id, TensorRow *result) noexcept override { + RETURN_UNEXPECTED_IF_NULL(result); { MS_ASSERT(worker_id < num_consumers_); std::unique_lock lock(m_); @@ -53,7 +54,7 @@ class JaggedConnector : public Connector { } RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result)); - if (result->eoe()) { + if (result != nullptr && result->eoe()) { is_queue_finished_[pop_from_] = true; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc index e211f03b228..753fad75296 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/optional/tensor_op_fusion_pass.cc @@ -32,12 +32,14 @@ namespace mindspore { namespace dataset { Status TensorOpFusionPass::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); std::vector> ops = node->operations(); // start temporary code, to deal with pre-built TensorOperation std::vector pattern = {kDecodeOp, kRandomCropAndResizeOp}; auto itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(), - [](auto op, const std::string &nm) { return op->Name() == nm; }); + [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; }); if (itr != ops.end()) { MS_LOG(WARNING) << "Fusing pre-build Decode and RandomCropResize into one pre-build."; auto fused_op = dynamic_cast((*(itr + 1))->Build().get()); @@ -52,7 +54,7 @@ Status TensorOpFusionPass::Visit(std::shared_ptr node, bool *const modi // logic below is for non-prebuilt TensorOperation pattern = {vision::kDecodeOperation, vision::kRandomResizedCropOperation}; itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(), - [](auto op, const std::string &nm) { return op->Name() == nm; }); + [](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; }); // return here if no pattern is found RETURN_OK_IF_TRUE(itr == ops.end()); diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc index ead6bd4d69f..da4d0887321 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/auto_worker_pass.cc @@ -27,6 +27,8 @@ namespace dataset { // this will become the RootNode:DatasetNode when it is turned on Status AutoWorkerPass::RunOnTree(std::shared_ptr root_ir, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(root_ir); + RETURN_UNEXPECTED_IF_NULL(modified); uint8_t config = GlobalContext::config_manager()->get_auto_worker_config(); OpWeightPass pass(kOpWeightConfigs[config < kOpWeightConfigs.size() ? config : 0]); @@ -46,6 +48,8 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr root_ir, bool *con // get the maximum weight of all the ops, this value is used to ensure the ratio of num_workers between ops float max_weight = 0; for (const auto &p : pass.weight_profile_) max_weight = std::max(max_weight, p.second); + + CHECK_FAIL_RETURN_UNEXPECTED(max_weight != 0, "Internal error, doesn't allow divide zero."); RETURN_IF_NOT_OK(pass.Run(root_ir, modified)); constexpr size_t max_num_ops = 3; if (pass.parallel_ops_.size() > max_num_ops) { @@ -53,6 +57,7 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr root_ir, bool *con << "1 batch and 1 map. AutoNumWorker may not be optimal for usage on complex pipelines."; } + CHECK_FAIL_RETURN_UNEXPECTED(pass.weight_sum_ != 0, "Internal error, doesn't allow divide zero."); for (auto &p : pass.parallel_ops_) { // get the num worker via the weight ratio int32_t num_workers = std::ceil((thread_cnt_ * p.second) / (pass.weight_sum_ * num_shards)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc index 778c1262b5d..a7d98ccc361 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/post/repeat_pass.cc @@ -33,6 +33,8 @@ RepeatPass::RepeatPass() // Identifies the subtree below this node as being in a repeated path of the tree. Status RepeatPass::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // If this is an infinite repeat under infinite repeat/epoch, adjust current num_repeats_. // Otherwise, after multiplication it would become positive and this repeat wouldn't run infinitely. if (node->Count() == DatasetOp::kInfiniteRepeat && num_repeats_ < 0) { @@ -56,6 +58,8 @@ Status RepeatPass::Visit(std::shared_ptr node, bool *const modified) // Identifies the subtree below this node as being in a repeated path of the tree. Status RepeatPass::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // Get the total number of epochs from the EpochCtrlOp parameter num_epochs_ = node->Count(); // Every node below this EpochCtrlOp should be repeated for num_epochs_ times. @@ -69,6 +73,8 @@ Status RepeatPass::Visit(std::shared_ptr node, bool *const modifi #ifndef ENABLE_ANDROID // Identifies the subtree below this node as being in a cache merge path Status RepeatPass::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // Turn on the flag that we're under a merge op is_merge_ = true; return Status::OK(); @@ -76,6 +82,8 @@ Status RepeatPass::Visit(std::shared_ptr node, bool *const modif // Identifies the subtree below this node as being cached Status RepeatPass::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // Turn on the flag that we're under a merge op is_cached_ = true; return Status::OK(); @@ -84,6 +92,8 @@ Status RepeatPass::Visit(std::shared_ptr node, bool *const modified) // Hooks up any identified eoe nodes under this repeat. Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up // and set its total repeats. It is important that the op is removed from the save area, // because the merge op above us may also take action on it later for a different case when @@ -103,12 +113,16 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modi // The total repeats of nodes above this Repeat(n) have nothing to do with this RepeatOp's parameter n. // But num_repeats_ has been multiplied by n during this Repeat(n)'s PreRunOnNode, // so we divide num_repeats_ by n to be able to correctly set total repeats for nodes above this RepeatOp. + CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0."); num_repeats_ /= node->Count(); return Status::OK(); } // Hooks up any identified eoe nodes under this repeat. Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); + CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0."); node->SetTotalRepeats(num_repeats_); node->SetNumEpochs(num_epochs_); // We finish the walk of this EpochCtrl's descendent nodes. @@ -119,6 +133,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const m // All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up // for use with a controlling repeat above it. Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // If we are under a cache op, then save ourselves to the cached op stack. if (is_cached_) { AddToCachedNodeStack(node); @@ -132,6 +148,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const mod #ifndef ENABLE_ANDROID // CacheOp removes previous leaf ops and replaces them with itself Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); is_cached_ = false; // if we are a cache within a repeat path of the tree, then adjust the total repeats and total epochs for cached ops. @@ -153,6 +171,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modif // Turns off the tracking for operations under merge op Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // If there was not any repeat in the merge cache miss leg, then the cache_lookup // would not have been consumed yet. In that case, we need to set its total repeats for it. if (cache_lookup_) { @@ -168,6 +188,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const // Saves the lookup up in case it needs to be referenced by a repeat Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); if (!node->IsLeaf()) { // By definition, the CacheLookup must be a leaf op. Make that clear here. RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!"); @@ -185,6 +207,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const #endif Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); // Set total repeats and total epochs for the TransferNode node->SetTotalRepeats(num_epochs_); node->SetNumEpochs(num_epochs_); @@ -192,7 +216,12 @@ Status RepeatPass::VisitAfter(std::shared_ptr node, bool *const mo } // Adds an operator to the cached operator stack save area -void RepeatPass::AddToCachedNodeStack(const std::shared_ptr &node) { cached_node_stacks_.push(node); } +void RepeatPass::AddToCachedNodeStack(const std::shared_ptr &node) { + if (node == nullptr) { + return; + } + cached_node_stacks_.push(node); +} // Pops an operator from the cached operator stack save area std::shared_ptr RepeatPass::PopFromCachedNodeStack() { diff --git a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc index 302d84e6a79..082557c2ae2 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/opt/pre/epoch_ctrl_pass.cc @@ -29,6 +29,10 @@ EpochCtrlPass::InjectionFinder::InjectionFinder(std::shared_ptr nod // Performs finder work for BuildVocabOp that has special rules about epoch control injection Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); + CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0, + "Invalid data, the node of child should greater than zero."); // The injection is at the child of the root node injection_point_ = node->Children()[0]; num_epochs_ = node->num_epochs(); @@ -37,6 +41,8 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr node, boo // Performs finder work for BuildVocabOp that has special rules about epoch control injection Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); injection_point_ = nullptr; return Status::OK(); } @@ -44,12 +50,18 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr nod #ifndef ENABLE_ANDROID // Performs finder work for BuildSentencePieceVocabNode that has special rules about epoch control injection Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); injection_point_ = nullptr; return Status::OK(); } #endif Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr node, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(modified); + CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0, + "Invalid data, the node of child should greater than zero."); // Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here. // Move the injection point to the child of this node. injection_point_ = node->Children()[0]; @@ -61,6 +73,8 @@ EpochCtrlPass::EpochCtrlPass() {} // Runs an injection pass to inject in operators needed at the pre pass stage Status EpochCtrlPass::RunOnTree(std::shared_ptr root_ir, bool *const modified) { + RETURN_UNEXPECTED_IF_NULL(root_ir); + RETURN_UNEXPECTED_IF_NULL(modified); MS_LOG(INFO) << "Pre pass: Injection pass started."; // First, run the finder to perform any injection info before we can go ahead to drive the op injection work. diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc index 14baf948932..b5108f8d804 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_size.cc @@ -53,8 +53,8 @@ json ConnectorSize::ParseOpInfo(const DatasetOp &node, const std::vector children_id; - std::transform(children.begin(), children.end(), std::back_inserter(children_id), - [](std::shared_ptr op) -> int32_t { return op->id(); }); + (void)std::transform(children.begin(), children.end(), std::back_inserter(children_id), + [](const std::shared_ptr &op) -> int32_t { return op->id(); }); if (!children_id.empty()) { json_node["children"] = children_id; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc index acd80290486..e685b660b78 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/connector_throughput.cc @@ -29,6 +29,9 @@ namespace dataset { // temporary helper int ConnectorThroughput::InitNodes() { + if (tree_ == nullptr) { + return 0; + } auto it = (*tree_).begin(); return it.NumNodes(); } @@ -43,15 +46,16 @@ Status ConnectorThroughput::Sample() { out_row_count_row[col] = cur_out_rows_count; auto sz = timestamps_.size(); cur_time = std::chrono::steady_clock::now(); - double dt = 0; + double data_time = 0; if (sz > 1) { - auto _dt = std::chrono::duration_cast(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]); - dt = std::chrono::duration(_dt).count(); + auto full_time = + std::chrono::duration_cast(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]); + data_time = std::chrono::duration(full_time).count(); } auto prev_out_rows_count = out_row_count_table_[col][out_row_count_table_.size() - 1]; - if (dt != 0) { + if (data_time != 0) { const int32_t multiplier = 1000; - auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * dt); + auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * data_time); throughput_row[col] = thr; } else { throughput_row[col] = 0; @@ -70,7 +74,7 @@ json ConnectorThroughput::ParseOpInfo(const DatasetOp &node, const std::vector children_id; std::transform(children.begin(), children.end(), std::back_inserter(children_id), - [](std::shared_ptr op) -> int32_t { return op->id(); }); + [](const std::shared_ptr &op) -> int32_t { return op ? op->id() : 0; }); json json_node; json_node["op_id"] = node.id(); json_node["op_type"] = node.Name(); @@ -100,8 +104,10 @@ Status ConnectorThroughput::SaveToFile() { int col = 0; for (auto &node : *tree_) { std::vector throughput; - for (auto i = 0; i < throughput_.size(); i++) { - throughput.push_back(throughput_[col][i]); + if (throughput_.size() > col) { + for (auto i = 0; i < throughput_[col].size(); i++) { + throughput.push_back(throughput_[col][i]); + } } if (!path.Exists()) { diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc index 5e5c14d11a1..066450848f3 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.cc @@ -18,9 +18,9 @@ #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) #include #endif -#include #include #include +#include #include #include #include @@ -33,8 +33,8 @@ using json = nlohmann::json; namespace mindspore { namespace dataset { -bool BaseCpu::fetched_all_process_shared = false; -std::unordered_map> BaseCpu::op_process_shared = {}; +bool BaseCpu::fetched_all_process_shared_ = false; +std::unordered_map> BaseCpu::op_process_shared_ = {}; #if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__) #define USING_LINUX @@ -46,8 +46,8 @@ BaseCpu::BaseCpu() { pre_cpu_stat_.io_stat_ = 0; pre_cpu_stat_.idle_stat_ = 0; pre_cpu_stat_.total_stat_ = 0; - fetched_all_process = false; - pre_fetched_state = false; + fetched_all_process_ = false; + pre_fetched_state_ = false; cpu_processor_num_ = 0; } @@ -157,6 +157,7 @@ Status DeviceCpu::Collect(const ExecutionTree *tree) { return Status::OK(); } Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { + RETURN_UNEXPECTED_IF_NULL(name); name->clear(); name->append("device_info"); int total_samples = cpu_util_.size(); @@ -221,6 +222,7 @@ Status DeviceCpu::SaveToFile(const std::string &file_path) { Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id, std::unordered_map> *op_stat) { + RETURN_UNEXPECTED_IF_NULL(op_stat); pid_t pid = 0; #if defined(USING_LINUX) pid = syscall(SYS_getpid); @@ -257,11 +259,12 @@ Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id, } Status OperatorCpu::Collect(const ExecutionTree *tree) { + RETURN_UNEXPECTED_IF_NULL(tree); if (first_collect_) { for (auto iter = tree->begin(); iter != tree->end(); ++iter) { id_count_++; - op_name[iter->id()] = iter->NameWithID(); - op_parallel_workers[iter->id()] = iter->num_workers(); + op_name_[iter->id()] = iter->NameWithID(); + op_parallel_workers_[iter->id()] = iter->num_workers(); } #if defined(USING_LINUX) cpu_processor_num_ = get_nprocs_conf(); @@ -269,34 +272,34 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { } // Obtain the op and thread mapping - op_thread.clear(); + op_thread_.clear(); List allTasks = tree->AllTasks()->GetTask(); for (auto &task1 : allTasks) { int32_t op_id = task1.get_operator_id(); - op_thread[op_id].emplace_back(task1.get_linux_id()); + op_thread_[op_id].emplace_back(task1.get_linux_id()); } // add process id into op_thread - if (!fetched_all_process) { + if (!fetched_all_process_) { { py::gil_scoped_acquire gil_acquire; py::module ds = py::module::import("mindspore.dataset.engine.datasets"); py::tuple process_info = ds.attr("_get_operator_process")(); py::dict sub_process = py::reinterpret_borrow(process_info[0]); - fetched_all_process = py::reinterpret_borrow(process_info[1]); + fetched_all_process_ = py::reinterpret_borrow(process_info[1]); // parse dict value - op_process = toIntMap(sub_process); - BaseCpu::op_process_shared = op_process; - BaseCpu::fetched_all_process_shared = fetched_all_process; + op_process_ = toIntMap(sub_process); + BaseCpu::op_process_shared_ = op_process_; + BaseCpu::fetched_all_process_shared_ = fetched_all_process_; } // judge whether there is device_que operator, if so operator id may need increase by one, temp use directly - for (auto item : op_process) { + for (auto item : op_process_) { if (!item.second.empty()) { - if (op_thread.find(item.first) != op_thread.end()) { - op_thread[item.first].insert(op_thread[item.first].end(), item.second.begin(), item.second.end()); + if (op_thread_.find(item.first) != op_thread_.end()) { + op_thread_[item.first].insert(op_thread_[item.first].end(), item.second.begin(), item.second.end()); } else { - op_thread[item.first] = item.second; + op_thread_[item.first] = item.second; } } } @@ -310,16 +313,15 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { if (!first_collect_) { // obtain all the op id in current tasks std::vector total_op_id; - for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) { - total_op_id.emplace_back(iter->first); - } + (void)std::transform(op_thread_.begin(), op_thread_.end(), std::back_inserter(total_op_id), + [](const auto &iter) { return iter.first; }); // iter all the op, and obtain the CPU utilization of each operator for (auto op_id = -1; op_id < id_count_; op_id++) { float user_util = 0, sys_util = 0; auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id); if (iter != total_op_id.end()) { - for (auto thread_id : op_thread[op_id]) { + for (auto thread_id : op_thread_[op_id]) { if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) { user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 / (total_stat_ - pre_total_stat_) * 100; @@ -329,7 +331,7 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { } } CpuOpUtil info; - info.op_id = op_id; + info.op_id_ = op_id; info.sys_utilization_ = sys_util; info.user_utilization_ = user_util; cpu_step_util_.emplace_back(info); @@ -337,10 +339,10 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { cpu_op_util_.emplace_back(cpu_step_util_); } else { // mainly obtain the init CPU execute time in first collect - for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) { - int32_t op_id = iter->first; - for (auto thread_id_ : iter->second) { - // ignore errors in the first collect + for (const auto &iter : op_thread_) { + int32_t op_id = iter.first; + for (auto thread_id_ : iter.second) { + // ParseCpuInfo may execute failed for cpu data not ready, but we still get next thread cpu info (void)ParseCpuInfo(op_id, thread_id_, &op_stat_); } } @@ -355,6 +357,8 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) { } Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { + RETURN_UNEXPECTED_IF_NULL(name); + RETURN_UNEXPECTED_IF_NULL(extra_message); int total_samples = cpu_op_util_.size(); // Only analyze the middle half of the samples @@ -374,15 +378,15 @@ Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string sum += cpu_op_util_[i][index].sys_utilization_; } if ((end_analyze - start_analyze) > 0) { - op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers[op_id] * (end_analyze - start_analyze)); + op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers_[op_id] * (end_analyze - start_analyze)); } if (op_util > *utilization) { *utilization = op_util; name->clear(); - name->append(op_name[op_id]); + (void)name->append(op_name_[op_id]); } - extra_message->append(op_name[op_id] + " utiliization per thread: " + std::to_string(op_util) + "% (" + - std::to_string(op_parallel_workers[op_id]) + " parallel_workers); "); + (void)extra_message->append(op_name_[op_id] + " utilization per thread: " + std::to_string(op_util) + "% (" + + std::to_string(op_parallel_workers_[op_id]) + " parallel_workers); "); } return Status::OK(); } @@ -428,24 +432,24 @@ Status ProcessCpu::ParseCpuInfo() { uint64_t total_stat_; RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_)); - if (!pre_fetched_state) { - process_id.clear(); + if (!pre_fetched_state_) { + process_id_.clear(); pid_t main_pid = 0; #if defined(USING_LINUX) main_pid = syscall(SYS_getpid); #endif - process_id.emplace_back(main_pid); - op_process = BaseCpu::op_process_shared; - fetched_all_process = BaseCpu::fetched_all_process_shared; - for (auto item : op_process) { - for (auto id : item.second) { - process_id.emplace_back(id); + process_id_.emplace_back(main_pid); + op_process_ = BaseCpu::op_process_shared_; + fetched_all_process_ = BaseCpu::fetched_all_process_shared_; + for (const auto &item : op_process_) { + for (const auto &id : item.second) { + process_id_.emplace_back(id); } } } float user_util = 0, sys_util = 0; - for (auto pid : process_id) { + for (const auto &pid : process_id_) { std::string stat_path = "/proc/" + std::to_string(pid) + "/stat"; std::ifstream file(stat_path); @@ -479,11 +483,12 @@ Status ProcessCpu::ParseCpuInfo() { } pre_total_stat_ = total_stat_; first_collect_ = false; - pre_fetched_state = fetched_all_process; + pre_fetched_state_ = fetched_all_process_; return Status::OK(); } Status ProcessCpu::Collect(const ExecutionTree *tree) { + RETURN_UNEXPECTED_IF_NULL(tree); if (first_collect_) { #if defined(USING_LINUX) cpu_processor_num_ = get_nprocs_conf(); @@ -495,6 +500,9 @@ Status ProcessCpu::Collect(const ExecutionTree *tree) { } Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) { + RETURN_UNEXPECTED_IF_NULL(name); + RETURN_UNEXPECTED_IF_NULL(utilization); + RETURN_UNEXPECTED_IF_NULL(extra_message); name->clear(); name->append("process_info"); int total_samples = process_util_.size(); diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h index 5d12e1a3b87..59ba22e020a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/cpu_sampling.h @@ -49,7 +49,7 @@ typedef struct CpuInfo_s { typedef struct CpuOpInfo_s { float user_utilization_; float sys_utilization_; - int32_t op_id; + int32_t op_id_; } CpuOpUtil; // CPU utilization of process @@ -78,11 +78,11 @@ class BaseCpu { protected: std::vector cpu_util_; CpuStat pre_cpu_stat_; - static bool fetched_all_process_shared; - static std::unordered_map> op_process_shared; - bool fetched_all_process; - bool pre_fetched_state; - std::unordered_map> op_process; + static bool fetched_all_process_shared_; + static std::unordered_map> op_process_shared_; + bool fetched_all_process_; + bool pre_fetched_state_; + std::unordered_map> op_process_; int32_t cpu_processor_num_; }; @@ -136,9 +136,9 @@ class OperatorCpu : public BaseCpu { bool first_collect_; // Store the id and its corresponding threads. - std::unordered_map> op_thread; - std::unordered_map op_name; - std::unordered_map op_parallel_workers; + std::unordered_map> op_thread_; + std::unordered_map op_name_; + std::unordered_map op_parallel_workers_; std::unordered_map> pre_op_stat_; uint64_t pre_total_stat_; int32_t id_count_; @@ -161,7 +161,7 @@ class ProcessCpu : public BaseCpu { std::vector process_util_; uint64_t pre_total_stat_; std::unordered_map pre_process_stat_; - std::vector process_id; + std::vector process_id_; }; // Sampling CPU information diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h index 538b84f3468..2a251057236 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/perf_data.h @@ -52,7 +52,9 @@ class PerfData { void AddSample(const T &row) { auto i = 0; for (const auto &e : row) { - data_[i++].push_back(e); + if (data_.size() > i) { + data_[i++].push_back(e); + } } counter_++; } @@ -62,7 +64,9 @@ class PerfData { auto Row(dsize_t idx) { std::vector row(n_cols_); for (auto i = 0; i < n_cols_; i++) { - row[i] = data_[i][idx]; + if (data_.size() > i && data_[i].size() > idx) { + row[i] = data_[i][idx]; + } } return row; } diff --git a/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc index 6d6b3645d2a..3be230ea4af 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/perf/profiling.cc @@ -51,6 +51,7 @@ Status Tracing::SaveToFile() { } Status Sampling::ReadJson(nlohmann::json *output) { + RETURN_UNEXPECTED_IF_NULL(output); Path path = Path(file_path_); if (path.Exists()) { MS_LOG(DEBUG) << file_path_ << " exists"; diff --git a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc index e8bd4d22655..243a4860050 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/serdes.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/serdes.cc @@ -25,6 +25,8 @@ std::map node, const std::string &filename, nlohmann::json *out_json) { + RETURN_UNEXPECTED_IF_NULL(node); + RETURN_UNEXPECTED_IF_NULL(out_json); // Dump attributes of current node to json string nlohmann::json args; RETURN_IF_NOT_OK(node->to_json(&args)); diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc index ee2900cb72f..18171c5bb20 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter.cc @@ -48,6 +48,7 @@ TreeAdapter::TreeAdapter(UsageFlag usage) : usage_(usage), launched_(false), tre } Status TreeAdapter::PrePass(std::shared_ptr ir) { + RETURN_UNEXPECTED_IF_NULL(ir); // Vector of actions in pre-pass phase std::vector> actions; @@ -73,6 +74,7 @@ Status TreeAdapter::PrePass(std::shared_ptr ir) { } Status TreeAdapter::Optimize(std::shared_ptr ir) { + RETURN_UNEXPECTED_IF_NULL(ir); // Vector of optimizations std::vector> optimizations; MS_LOG(INFO) << "Running optimization pass loops"; @@ -89,6 +91,7 @@ Status TreeAdapter::Optimize(std::shared_ptr ir) { } Status TreeAdapter::PostPass(std::shared_ptr ir) { + RETURN_UNEXPECTED_IF_NULL(ir); // Vector of actions in post-pass phase std::vector> actions; MS_LOG(INFO) << "Running post pass loops."; @@ -118,6 +121,9 @@ Status TreeAdapter::PostPass(std::shared_ptr ir) { } Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr ir, std::shared_ptr *const op) { + RETURN_UNEXPECTED_IF_NULL(ir); + RETURN_UNEXPECTED_IF_NULL(op); + RETURN_UNEXPECTED_IF_NULL(tree_); // Build the DatasetOp ExecutionTree from the optimized IR tree std::vector> ops; RETURN_IF_NOT_OK(ir->Build(&ops)); @@ -133,7 +139,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr ir, std } // Build the children of IR, once they return, add the return value to *op - for (std::shared_ptr child_ir : ir->Children()) { + for (const std::shared_ptr &child_ir : ir->Children()) { std::shared_ptr child_op; RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op)); RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops @@ -143,6 +149,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr ir, std } Status TreeAdapter::Build(std::shared_ptr root_ir) { + RETURN_UNEXPECTED_IF_NULL(root_ir); // This will evolve in the long run tree_ = std::make_unique(); // disable profiling if this is only a getter pass diff --git a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc index fb9b39a621e..a6817a9ee3a 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/tree_adapter_lite.cc @@ -22,6 +22,8 @@ namespace dataset { TreeAdapterLite::TreeAdapterLite() : root_(nullptr) { tree_ = std::make_unique(); } Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr ir, std::shared_ptr *const op) { + RETURN_UNEXPECTED_IF_NULL(ir); + RETURN_UNEXPECTED_IF_NULL(op); // Build the DatasetOp ExecutionTree from the optimized IR tree std::vector> ops; RETURN_IF_NOT_OK(ir->Build(&ops)); @@ -41,7 +43,7 @@ Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr ir, } // Build the children of IR, once they return, add the return value to *op - for (std::shared_ptr child_ir : ir->Children()) { + for (const std::shared_ptr &child_ir : ir->Children()) { std::shared_ptr child_op; RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op)); RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops @@ -60,6 +62,7 @@ Status TreeAdapterLite::BuildTree(std::shared_ptr root_ir) { Status TreeAdapterLite::GetNextRow(TensorRow *const row) { RETURN_UNEXPECTED_IF_NULL(root_); RETURN_IF_NOT_OK(root_->GetNextRowPullMode(row)); + RETURN_UNEXPECTED_IF_NULL(row); return Status::OK(); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h index 7e4dbe09bb5..8e1264bb817 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/dvpp_normalize_op.h @@ -19,6 +19,7 @@ #include #include +#include #include #include "minddata/dataset/core/device_tensor.h" #include "minddata/dataset/core/device_resource.h" @@ -30,7 +31,8 @@ namespace mindspore { namespace dataset { class DvppNormalizeOp : public TensorOp { public: - explicit DvppNormalizeOp(std::vector mean, std::vector std) : mean_(mean), std_(std) {} + explicit DvppNormalizeOp(std::vector mean, std::vector std) + : mean_(std::move(mean)), std_(std::move(std)) {} ~DvppNormalizeOp() = default; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h index cf898815a72..bd5026b972a 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h @@ -18,7 +18,7 @@ #ifndef ENABLE_DVPP_INTERFACE #define ENABLE_DVPP_INTERFACE #endif -#include +#include #include #include #include diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc index 55886fcdf80..852eb98ec36 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.cc @@ -13,13 +13,14 @@ * limitations under the License. */ +#include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h" + +#include +#include +#include #include "minddata/dataset/include/dataset/constants.h" #include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/kernels/image/image_utils.h" -#include "MDAclProcess.h" -#include -#include -#include namespace { const int BUFFER_SIZE = 2048; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h index 41b790ef938..cd162823f7b 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h @@ -17,25 +17,25 @@ #define MDACLMANAGER_H #include -#include -#include +#include #include #include #include +#include +#include +#include +#include #include "acl/acl.h" -#include "CommonDataType.h" + #include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/core/data_type.h" +#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h" +#include "minddata/dataset/kernels/image/dvpp/utils/DvppCommon.h" +#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h" #include "mindspore/ccsrc/minddata/dataset/core/device_tensor.h" #include "mindspore/ccsrc/minddata/dataset/core/tensor.h" #include "mindspore/core/utils/log_adapter.h" #include "mindspore/ccsrc/minddata/dataset/util/status.h" -#include "ErrorCode.h" -#include "DvppCommon.h" -#include -#include -#include -#include mode_t SetFileDefaultUmask(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h index ff5f29099f2..daed1f9faed 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/dvpp/utils/ResourceManager.h @@ -16,17 +16,18 @@ #ifndef RESOURCEMANAGER_H #define RESOURCEMANAGER_H -#include -#include +#include #include #include -#include #include -#include "CommonDataType.h" -#include "ErrorCode.h" +#include #include +#include +#include #include "mindspore/core/utils/log_adapter.h" #include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h" +#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h" +#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h" enum ModelLoadMethod { LOAD_FROM_FILE = 0, // Loading from file, memory of model and weights are managed by ACL diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc index 96e4c89e1a4..0bde0e63216 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/canny.cc @@ -48,7 +48,7 @@ static void GetSobelKernel(float *kernel, int flag, int ksize, double scale) { buffer[0] = 1, buffer[1] = -2, buffer[2] = 1; } } else { - int old, now; + float old, now; buffer[0] = 1; for (int i = 0; i < ksize; i++) { buffer[i + 1] = 0; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc index 57eaecbb6a1..04549c9638e 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/image_process.cc @@ -571,9 +571,8 @@ bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale) { if (dst.IsEmpty()) { dst.Init(src.width_, src.height_, src.channel_, LDataType::FLOAT32); - } else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_) { - return false; - } else if (dst.data_type_ != LDataType::FLOAT32) { + } else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_ || + dst.data_type_ != LDataType::FLOAT32) { return false; } @@ -662,24 +661,16 @@ bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h) { } static bool CheckZero(const std::vector &vs) { - for (int i = 0; i < vs.size(); i++) { - if (Equal(vs[i], 0.0f)) { - return true; - } - } - return false; + return std::any_of(vs.begin(), vs.end(), [](const float &v) { return Equal(v, 0.0f); }); } static bool CheckZero(const std::vector &vs) { - for (int i = 0; i < vs.size(); i++) { - if (vs[i] == 0) return true; - } - return false; + return std::any_of(vs.begin(), vs.end(), [](const float &v) { return v == 0; }); } static bool CheckMeanAndStd(const LiteMat &src, LiteMat &dst, int channel, const std::vector &mean, const std::vector &std) { - if (mean.size() == 0 && std.size() == 0) { + if (mean.empty() && std.empty()) { return false; } if (src.data_type_ != LDataType::FLOAT32) { @@ -935,8 +926,8 @@ bool Merge(const std::vector &mv, LiteMat &dst) { LDataType data_type = mv[0].data_type_; // The arrays in list must be single-channel - for (int i = 0; i < mv.size(); i++) { - if (mv[i].channel_ != 1) return false; + if (std::any_of(mv.begin(), mv.end(), [](const LiteMat &m) { return m.channel_ != 1; })) { + return false; } for (int i = 1; i < mv.size(); i++) { @@ -998,7 +989,7 @@ bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int ri return true; } -std::vector> GetDefaultBoxes(BoxesConfig config) { +std::vector> GetDefaultBoxes(const BoxesConfig config) { size_t size = config.num_default.size(); if (size <= 1 || config.feature_size.size() != size || config.steps.size() != size || config.aspect_rations.size() != size) { @@ -1116,6 +1107,7 @@ std::vector ApplyNms(const std::vector> &all_boxes, std: } } std::vector new_order; + new_order.reserve(inds.size()); for (int k = 0; k < inds.size(); k++) { new_order.push_back(order[inds[k]]); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc index f380f34de9a..5d17bc4f51b 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.cc @@ -283,9 +283,7 @@ void LiteMat::Release() { if (data_ptr_) { AlignFree(data_ptr_); } - if (ref_count_) { - delete[] ref_count_; - } + delete[] ref_count_; } data_ptr_ = nullptr; elem_size_ = 0; @@ -293,7 +291,7 @@ void LiteMat::Release() { height_ = 0; channel_ = 0; c_step_ = 0; - ref_count_ = 0; + ref_count_ = nullptr; size_ = 0; setSteps(0, 0, 0); } @@ -418,7 +416,7 @@ inline void SubtractImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d } inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { - if (dst == NULL) { + if (dst == nullptr) { return false; } @@ -426,10 +424,7 @@ inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat * return false; } - if (src_a.data_type_ != src_b.data_type_) { - return false; - } - return true; + return src_a.data_type_ == src_b.data_type_; } bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { @@ -585,7 +580,7 @@ inline void DivideImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *dst } inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { - if (dst == NULL) { + if (dst == nullptr) { return false; } @@ -593,10 +588,7 @@ inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst return false; } - if (src_a.data_type_ != src_b.data_type_) { - return false; - } - return true; + return src_a.data_type_ == src_b.data_type_; } bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { @@ -693,7 +685,7 @@ inline void MultiplyImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d } inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { - if (dst == NULL) { + if (dst == nullptr) { return false; } @@ -701,10 +693,7 @@ inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *d return false; } - if (src_a.data_type_ != src_b.data_type_) { - return false; - } - return true; + return src_a.data_type_ == src_b.data_type_; } bool Multiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) { diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h index d483ca9847b..db43b464399 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/lite_mat.h @@ -166,15 +166,9 @@ class LDataType { ~LDataType() = default; inline Type Value() const { return type_; } - inline bool operator==(const LDataType &ps) const { - if (this->type_ == ps.type_) return true; - return false; - } + inline bool operator==(const LDataType &ps) const { return this->type_ == ps.type_; } - inline bool operator!=(const LDataType &ps) const { - if (this->type_ != ps.type_) return true; - return false; - } + inline bool operator!=(const LDataType &ps) const { return this->type_ != ps.type_; } uint8_t SizeInBytes() const { if (type_ < LDataType::NUM_OF_TYPES) diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc index 2ec3fb0fed3..f8729a99fd5 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_cv/warp_affine.cc @@ -381,11 +381,9 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int } if (dst.IsEmpty()) { (void)dst.Init(dst_w, dst_h, src.channel_, LDataType::UINT8); - } else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_) { + } else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_ || + dst.data_type_ != LDataType::UINT8) { return false; - } else if (dst.data_type_ != LDataType::UINT8) { - return false; - } else { } double IM[6]; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc index 7fa5853db78..2c94e1447b8 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/lite_image_utils.cc @@ -182,6 +182,8 @@ Status JpegCropAndDecode(const std::shared_ptr &input, std::shared_ptr::max() - crop_w) > crop_x, "invalid crop width"); + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - crop_h) > crop_y, "invalid crop height"); if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) { crop_w = cinfo.output_width; crop_h = cinfo.output_height; @@ -190,6 +192,7 @@ Status JpegCropAndDecode(const std::shared_ptr &input, std::shared_ptr &input, std::shared_ptrSizeInBytes(); JSAMPLE *buffer = reinterpret_cast(&(*output_tensor->begin())); + // stride refers to output tensor, which has 3 components at most + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - skipped_scanlines) > crop_h, + "Invalid crop height."); const int max_scanlines_to_read = skipped_scanlines + crop_h; // stride refers to output tensor, which has 3 components at most + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() / crop_w) > kOutNumComponents, + "Invalid crop width."); const int stride = crop_w * kOutNumComponents; // offset is calculated for scanlines read from the image, therefore // has the same number of components as the image @@ -246,6 +254,8 @@ Status Crop(const std::shared_ptr &input, std::shared_ptr *outpu RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8"); } + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - y) > h, "Invalid crop height."); + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - x) > w, "Invalid crop width."); // account for integer overflow if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) { RETURN_STATUS_UNEXPECTED( @@ -410,7 +420,10 @@ Status Resize(const std::shared_ptr &input, std::shared_ptr *out Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation, TensorRow *outputs) { outputs->resize(3); + CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() > 0, + "Invalid input, should greater than 0, but got " + std::to_string(inputs.size())); std::shared_ptr input = inputs[0]; + CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be greater than 3 dimensions."); LiteMat lite_mat_src(input->shape()[1], input->shape()[0], input->shape()[2], const_cast(reinterpret_cast(input->GetBuffer())), GetLiteCVDataType(input->type())); @@ -537,7 +550,15 @@ Status Pad(const std::shared_ptr &input, std::shared_ptr *output std::shared_ptr output_tensor; + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - lite_mat_rgb.width_) > pad_left, + "Invalid pad width."); + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - lite_mat_rgb.width_ + pad_left) > pad_right, + "Invalid pad width."); int pad_width = lite_mat_rgb.width_ + pad_left + pad_right; + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - lite_mat_rgb.height_) > pad_top, + "Invalid pad height."); + CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits::max() - lite_mat_rgb.height_ + pad_top) > pad_bottom, + "Invalid pad height."); int pad_height = lite_mat_rgb.height_ + pad_top + pad_bottom; TensorShape new_shape = TensorShape({pad_height, pad_width, input->shape()[2]}); RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor)); @@ -721,11 +742,13 @@ Status Affine(const std::shared_ptr &input, std::shared_ptr *out } int height = 0; int width = 0; + CHECK_FAIL_RETURN_UNEXPECTED(mat.size() <= 6, "Invalid mat shape."); double M[6] = {}; for (int i = 0; i < mat.size(); i++) { M[i] = static_cast(mat[i]); } + CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be 3."); LiteMat lite_mat_rgb(input->shape()[1], input->shape()[0], input->shape()[2], const_cast(reinterpret_cast(input->GetBuffer())), GetLiteCVDataType(input->type())); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc index 8e09463a35a..2cd13e1ab62 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.cc @@ -22,7 +22,7 @@ namespace mindspore { namespace dataset { -const int32_t ResizePreserveAROp::kDefImgorientation = 0; +const int32_t ResizePreserveAROp::kDefImgOrientation = 0; ResizePreserveAROp::ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation) : height_(height), width_(width), img_orientation_(img_orientation) {} diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h index d473c80c351..67ca8dbc2b1 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_preserve_ar_op.h @@ -34,9 +34,9 @@ namespace dataset { class ResizePreserveAROp : public TensorOp { public: // Default values, also used by python_bindings.cc - static const int32_t kDefImgorientation; + static const int32_t kDefImgOrientation; - ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgorientation); + ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgOrientation); ~ResizePreserveAROp() override = default; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc index b2ea0aeb14c..0d5fe7ecc98 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/resize_with_bbox_op.cc @@ -35,9 +35,9 @@ Status ResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) { int32_t input_w = input[0]->shape()[1]; output->resize(2); - (*output)[1] = std::move(input[1]); // move boxes over to output + (*output)[1] = input[1]; // move boxes over to output - std::shared_ptr input_cv = CVTensor::AsCVTensor(std::move(input[0])); + std::shared_ptr input_cv = CVTensor::AsCVTensor(input[0]); RETURN_IF_NOT_OK(ResizeOp::Compute(std::static_pointer_cast(input_cv), &(*output)[0])); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h index 0502de73a78..77f215062d3 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_bgr_op.h @@ -29,7 +29,7 @@ namespace mindspore { namespace dataset { class RgbaToBgrOp : public TensorOp { public: - RgbaToBgrOp() {} + RgbaToBgrOp() = default; ~RgbaToBgrOp() override = default; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h index 602dd4abd3f..deed2513e6f 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/rgba_to_rgb_op.h @@ -29,7 +29,7 @@ namespace mindspore { namespace dataset { class RgbaToRgbOp : public TensorOp { public: - RgbaToRgbOp() {} + RgbaToRgbOp() = default; ~RgbaToRgbOp() override = default; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc index b24359089ac..c7609601c66 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/sharpness_op.cc @@ -42,9 +42,10 @@ Status SharpnessOp::Compute(const std::shared_ptr &input, std::shared_pt /// 1, 5, 1, /// 1, 1, 1 - float filterSum = 13.0; + const float filterMid = 5.0; + const float filterSum = 13.0; cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum)); - filter.at(1, 1) = 5.0 / filterSum; + filter.at(1, 1) = filterMid / filterSum; /// applying filter on channels cv::Mat result = cv::Mat(); diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc index 237dc590dcc..43ca7a43a5c 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.cc @@ -57,7 +57,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr(input->GetBuffer()); + auto buffer = const_cast(input->GetBuffer()); CHECK_FAIL_RETURN_UNEXPECTED(buffer != nullptr, "SoftDvppDecodeRandomCropResizeJpeg: the input image buffer is empty."); SoftDpProcsessInfo info; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h index 1c13433d08d..2672b32ec42 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_random_crop_resize_jpeg_op.h @@ -21,9 +21,9 @@ #include #include -#include "./utils/external_soft_dp.h" #include "minddata/dataset/core/tensor.h" #include "minddata/dataset/kernels/image/random_crop_and_resize_op.h" +#include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h" #include "minddata/dataset/util/status.h" namespace mindspore { diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h index 21bb54c2225..9bc3381d6a2 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/soft_dvpp_decode_resize_jpeg_op.h @@ -32,7 +32,7 @@ class SoftDvppDecodeResizeJpegOp : public TensorOp { : target_height_(target_height), target_width_(target_width) {} /// \brief Destructor - ~SoftDvppDecodeResizeJpegOp() = default; + ~SoftDvppDecodeResizeJpegOp() override = default; Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; Status OutputShape(const std::vector &inputs, std::vector &outputs) override; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h index b703eb35cc6..d7336f0fc32 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h @@ -17,7 +17,7 @@ #ifndef EXTERNAL_SOFTDP_H #define EXTERNAL_SOFTDP_H -#include +#include struct SoftDpProcsessInfo { uint8_t *input_buffer; // input buffer diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc index 3f90b4cf028..793e4164d0d 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.cc @@ -44,11 +44,10 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo *soft_dp_process_info) { } // use vpc interface to resize and convert RGB, give user output buf and output size. - SoftDpCropInfo crop; - crop.left = 0; - crop.right = vpc_input_info.real_width - 1; - crop.up = 0; - crop.down = vpc_input_info.real_height - 1; + auto crop = SoftDpCropInfo{.left = 0, + .right = static_cast(vpc_input_info.real_width - 1), + .up = 0, + .down = static_cast(vpc_input_info.real_height - 1)}; VpcInfo output; output.addr = soft_dp_process_info->output_buffer; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h index 5cfb87cf767..a706c129bf5 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp.h @@ -17,8 +17,8 @@ #ifndef SOFT_DP_H #define SOFT_DP_H -#include -#include "./external_soft_dp.h" +#include +#include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h" enum JpegdToVpcFormat { INPUT_VPC_UNKNOWN = -1, diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h index b40d9f5e54d..95a023d0de6 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_log.h @@ -25,11 +25,10 @@ #define DP_EVENT 0x10000 #define DP_DEBUG_LEVEL (DP_EVENT | DP_ERR | DP_WARNING | DP_INFO | DP_DEBUG) -#include -#include - #if defined(DVPP_UTST) || defined(DEBUG) #include +#include +#include #define DP_LOG(model, level, format, ...) \ do { \ @@ -67,6 +66,8 @@ #include #include +#include +#include #include "glog/logging.h" template diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc index 7afd61868cb..dfae51e53e6 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.cc @@ -48,9 +48,5 @@ bool IsDirectory(const std::string &path) { return false; } - if (S_ISDIR(buf.st_mode)) { - return true; - } else { - return false; - } + return S_ISDIR(buf.st_mode); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h index 549ad4a6ff8..14cc673a9fb 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_dp_tools.h @@ -40,11 +40,7 @@ T1 AlignDown(T1 num, T2 align) { template bool IsInTheScope(T num, T left_point, T right_point) { - if (num >= left_point && num <= right_point) { - return true; - } - - return false; + return num >= left_point && num <= right_point; } template diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc index 1a67a30e087..d40edbda7e7 100755 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.cc @@ -109,19 +109,19 @@ int32_t SoftVpc::CheckParamter() { uint32_t out_width = out_width_; uint32_t out_height = out_height_; - bool flag = (out_width * 32 >= crop_width) ? true : false; // A maximum of 32x zoom-out + bool flag = (out_width * 32 >= crop_width); // A maximum of 32x zoom-out VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, "Max reduction multiple is 32. Please check left(%u), right(%u), out_width(%u).", left_, right_, out_width); // Up to 16x magnification - flag = (crop_width * 16 >= out_width) ? true : false; + flag = (crop_width * 16 >= out_width); VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, "Max magnification is 16. Please check left(%u), right(%u), out_width(%u).", left_, right_, out_width); - flag = (out_height * 32 >= crop_height) ? true : false; // A maximum of 32x zoom-out + flag = (out_height * 32 >= crop_height); // A maximum of 32x zoom-out VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail, "Max reduction multiple is 32. Please check up(%u), down(%u), out_height(%u).", up_, down_, out_height); - flag = (crop_height * 16 >= out_height) ? true : false; // Up to 16x magnification + flag = (crop_height * 16 >= out_height); // Up to 16x magnification VPC_CHECK_COND_FAIL_PRINT_RETURN( flag, dpFail, "Max magnification is 16. Please check up(%u), down(%u), out_height(%u).", up_, down_, out_height); return dpSucc; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h index 4622d7d16e3..ed93a2353f7 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/soft_vpc.h @@ -34,7 +34,7 @@ class SoftVpc { public: SoftVpc(); - ~SoftVpc() {} + ~SoftVpc() = default; /* * @brief : vpc Cropping and Scaling APIs. diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc index 1b9bf6399eb..df27cf8e65b 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/soft_dvpp/utils/yuv_scaler_para_set.cc @@ -75,7 +75,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW // taps_4, the second character in the square brackets is the start address of the array block. if ((*flag_ctl - initBracketNum) % arrTypeNum == 2) { - while (1) { + while (true) { ss >> yuv_scaler_paraset->scale[cnt].taps_4[index->first_index++]; if (ss.fail()) { // rerad failed. index->first_index = index->first_index - 1; @@ -94,7 +94,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW // taps_6 if ((*flag_ctl - initBracketNum) % arrTypeNum == 0) { - while (1) { + while (true) { ss >> yuv_scaler_paraset->scale[cnt].taps_6[index->second_index++]; if (ss.fail()) { // read failed. index->second_index = index->second_index - 1; @@ -115,7 +115,6 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW } int32_t CheckParamater(std::pair rlt, uint32_t i) { - int32_t ret = dpSucc; if (rlt.first == false) { API_LOGE("Get real path failed. index = %u", i); return dpFail; @@ -126,7 +125,7 @@ int32_t CheckParamater(std::pair rlt, uint32_t i) { return dpFail; } - return ret; + return dpSucc; } // Read the parameter set file and skip the comments in the file. @@ -177,7 +176,7 @@ int32_t ParseFileToVar(const std::string *para_set_name, uint32_t yuv_scaler_par } // cale the number of "{",check the location of the data. - if (str_line.find("{") != std::string::npos) { + if (str_line.find('{') != std::string::npos) { flag_ctl++; flag_tap = 1; } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h index b69d91106de..ab36e53d359 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/solarize_op.h @@ -19,6 +19,7 @@ #include #include +#include #include #include "minddata/dataset/core/tensor.h" @@ -29,9 +30,9 @@ namespace mindspore { namespace dataset { class SolarizeOp : public TensorOp { public: - explicit SolarizeOp(std::vector threshold = {0, 255}) : threshold_(threshold) {} + explicit SolarizeOp(std::vector threshold = {0, 255}) : threshold_(std::move(threshold)) {} - ~SolarizeOp() = default; + ~SolarizeOp() override = default; Status Compute(const std::shared_ptr &input, std::shared_ptr *output) override; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h b/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h index 696d00b33bb..48206e488c2 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/swap_red_blue_op.h @@ -30,7 +30,7 @@ namespace dataset { class SwapRedBlueOp : public TensorOp { public: /// \brief Constructor - SwapRedBlueOp() {} + SwapRedBlueOp() = default; SwapRedBlueOp(const SwapRedBlueOp &rhs) = default; diff --git a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc index 95d75af0f2d..d27b6f9e3aa 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/image/uniform_aug_op.cc @@ -22,7 +22,7 @@ namespace dataset { const int UniformAugOp::kDefNumOps = 2; UniformAugOp::UniformAugOp(std::vector> op_list, int32_t num_ops) - : tensor_op_list_(op_list), num_ops_(num_ops) { + : tensor_op_list_(std::move(op_list)), num_ops_(num_ops) { rnd_.seed(GetSeed()); } diff --git a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc index ffb398c61ac..f12e758b1c4 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/ir/data/transforms_ir.cc @@ -70,7 +70,7 @@ Status ComposeOperation::ValidateParams() { std::shared_ptr ComposeOperation::Build() { std::vector> tensor_ops; (void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops), - [](std::shared_ptr op) -> std::shared_ptr { return op->Build(); }); + [](const auto &op) -> std::shared_ptr { return op->Build(); }); return std::make_shared(tensor_ops); } @@ -198,7 +198,7 @@ std::shared_ptr PadEndOperation::Build() { return std::make_shared tensor_op) : op_(tensor_op) { +PreBuiltOperation::PreBuiltOperation(std::shared_ptr tensor_op) : op_(std::move(tensor_op)) { #ifdef ENABLE_PYTHON auto pyfunc_tensor_op = std::dynamic_pointer_cast(tensor_op); if (pyfunc_tensor_op && pyfunc_tensor_op->IsRandom()) random_op_ = true; @@ -245,7 +245,7 @@ Status RandomChoiceOperation::ValidateParams() { std::shared_ptr RandomChoiceOperation::Build() { std::vector> tensor_ops; (void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops), - [](std::shared_ptr op) -> std::shared_ptr { return op->Build(); }); + [](const auto &op) -> std::shared_ptr { return op->Build(); }); return std::make_shared(tensor_ops); } diff --git a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc index 028111bfea2..68c4407ceb3 100644 --- a/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc +++ b/mindspore/ccsrc/minddata/dataset/text/ir/kernels/text_ir.cc @@ -15,7 +15,6 @@ */ #include - #include "minddata/dataset/text/ir/kernels/text_ir.h" #ifndef _WIN32 @@ -316,7 +315,9 @@ Status SentencePieceTokenizerOperation::ValidateParams() { RETURN_STATUS_SYNTAX_ERROR(err_msg); } } else { - Path vocab_file(vocab_path_); + std::string real_vocab_path; + RETURN_IF_NOT_OK(Path::RealPath(vocab_path_, real_vocab_path)); + Path vocab_file(real_vocab_path); if (!vocab_file.Exists() || vocab_file.IsDirectory()) { std::string err_msg = "SentencePieceTokenizer : vocab file: [" + vocab_path_ + "] is invalid or does not exist."; MS_LOG(ERROR) << err_msg; diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc index cee1de58447..1ae12990ae3 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/basic_tokenizer_op.cc @@ -54,10 +54,10 @@ BasicTokenizerOp::BasicTokenizerOp(const bool &lower_case, const bool &keep_whit : TokenizerOp(with_offsets), lower_case_(lower_case), keep_whitespace_(keep_whitespace), + normalization_form_(normalization_form), preserve_unused_token_(preserve_unused_token), case_fold_(std::make_unique()), nfd_normalize_(std::make_unique(NormalizeForm::kNfd)), - normalization_form_(normalization_form), common_normalize_(std::make_unique(normalization_form)), replace_accent_chars_(std::make_unique("\\p{Mn}", "")), replace_control_chars_(std::make_unique("\\p{Cc}|\\p{Cf}", " ")) { @@ -81,6 +81,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text icu::ErrorCode error; const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error); CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "BasicTokenizer: getNFKCCasefoldInstance failed."); + RETURN_UNEXPECTED_IF_NULL(output); output->clear(); // 1. get start and end offsets of not case fold strs @@ -131,7 +132,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptrtype() == DataType::DE_STRING, "BasicTokenizer: input is not string datatype."); std::vector strs(input->Size()); - int i = 0; + size_t i = 0; for (auto iter = input->begin(); iter != input->end(); iter++) { RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(*iter, kUnusedWords, &strs[i++])); } diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc index a3b93336c3f..f9f7a2790f8 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/case_fold_op.cc @@ -31,7 +31,7 @@ Status CaseFoldOp::Compute(const std::shared_ptr &input, std::shared_ptr const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error); CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "CaseFold: getNFKCCasefoldInstance failed."); std::vector strs(input->Size()); - int i = 0; + size_t i = 0; for (auto iter = input->begin(); iter != input->end(); iter++) { icu::StringByteSink sink(&strs[i++]); nfkc_case_fold->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); diff --git a/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc index d9b24eae454..b794b4c00f4 100644 --- a/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc +++ b/mindspore/ccsrc/minddata/dataset/text/kernels/ngram_op.cc @@ -44,7 +44,9 @@ Status NgramOp::Compute(const std::shared_ptr &input, std::shared_ptrshape().NumOfElements()); str_buffer.reserve(l_pad_with_sp_.size() * l_len_ + r_pad_with_sp_.size() * r_len_ + input->SizeInBytes()); offsets.push_back(str_buffer.size()); // insert 0 as the starting pos - for (int l_i = 0; l_i < l_len_; l_i++) offsets.push_back((str_buffer += l_pad_with_sp_).size()); + for (int l_i = 0; l_i < l_len_; l_i++) { + offsets.push_back((str_buffer += l_pad_with_sp_).size()); + } for (auto itr = input->begin(); itr != input->end(); ++itr) { str_buffer += (*itr); @@ -52,7 +54,9 @@ Status NgramOp::Compute(const std::shared_ptr &input, std::shared_ptr 0, "Ngram: ngrams needs to be a positive number.\n"); diff --git a/mindspore/ccsrc/minddata/dataset/util/allocator.h b/mindspore/ccsrc/minddata/dataset/util/allocator.h index 82cf9956fc2..6df5b1d6925 100644 --- a/mindspore/ccsrc/minddata/dataset/util/allocator.h +++ b/mindspore/ccsrc/minddata/dataset/util/allocator.h @@ -92,8 +92,9 @@ template , typename... Args> Status MakeUnique(std::unique_ptr> *out, C alloc, size_t n, Args &&... args) { RETURN_UNEXPECTED_IF_NULL(out); CHECK_FAIL_RETURN_UNEXPECTED(n > 0, "size must be positive"); + T *data = nullptr; try { - T *data = alloc.allocate(n); + data = alloc.allocate(n); // Some of our implementation of allocator (e.g. NumaAllocator) don't throw std::bad_alloc. // So we have to catch for null ptr if (data == nullptr) { @@ -114,8 +115,14 @@ Status MakeUnique(std::unique_ptr> *out, C alloc, }; *out = std::unique_ptr>(data, std::bind(deleter, std::placeholders::_1, alloc, n)); } catch (const std::bad_alloc &e) { + if (data != nullptr) { + alloc.deallocate(data, n); + } return Status(StatusCode::kMDOutOfMemory); } catch (const std::exception &e) { + if (data != nullptr) { + alloc.deallocate(data, n); + } RETURN_STATUS_UNEXPECTED(e.what()); } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/util/arena.cc b/mindspore/ccsrc/minddata/dataset/util/arena.cc index b64b2874f03..3540406a87a 100644 --- a/mindspore/ccsrc/minddata/dataset/util/arena.cc +++ b/mindspore/ccsrc/minddata/dataset/util/arena.cc @@ -42,6 +42,7 @@ ArenaImpl::ArenaImpl(void *ptr, size_t sz) : size_in_bytes_(sz), ptr_(ptr) { } Status ArenaImpl::Allocate(size_t n, void **p) { + RETURN_UNEXPECTED_IF_NULL(p); if (n == 0) { *p = nullptr; return Status::OK(); @@ -83,6 +84,10 @@ std::pair, bool> ArenaImpl::FindPrevBlk(uint64_t a } void ArenaImpl::Deallocate(void *p) { + if (p == nullptr) { + MS_LOG(ERROR) << "The pointer[p] is null."; + return; + } auto *q = get_base_addr(p); MemHdr hdr(0, 0); MemHdr::getHdr(q, &hdr); @@ -147,8 +152,8 @@ bool ArenaImpl::BlockEnlarge(uint64_t *addr, uint64_t old_sz, uint64_t new_sz) { } Status ArenaImpl::FreeAndAlloc(void **pp, size_t old_sz, size_t new_sz) { - MS_ASSERT(pp); - MS_ASSERT(*pp); + RETURN_UNEXPECTED_IF_NULL(pp); + RETURN_UNEXPECTED_IF_NULL(*pp); void *p = nullptr; void *q = *pp; RETURN_IF_NOT_OK(Allocate(new_sz, &p)); @@ -163,8 +168,8 @@ Status ArenaImpl::FreeAndAlloc(void **pp, size_t old_sz, size_t new_sz) { } Status ArenaImpl::Reallocate(void **pp, size_t old_sz, size_t new_sz) { - MS_ASSERT(pp); - MS_ASSERT(*pp); + RETURN_UNEXPECTED_IF_NULL(pp); + RETURN_UNEXPECTED_IF_NULL(*pp); uint64_t actual_size = static_cast(new_sz) + ARENA_WALL_OVERHEAD_SZ; if (actual_size > this->get_max_size()) { RETURN_STATUS_UNEXPECTED("Request size too big : " + std::to_string(new_sz)); @@ -212,6 +217,10 @@ int ArenaImpl::PercentFree() const { for (auto &it : tr_) { sz += it.priority; } + if (size_in_bytes_ == 0) { + MS_LOG(ERROR) << "size_in_bytes_ can not be zero."; + return 0; + } double ratio = static_cast(sz * ARENA_BLK_SZ) / static_cast(size_in_bytes_); return static_cast(ratio * 100.0); } diff --git a/mindspore/ccsrc/minddata/dataset/util/buddy.cc b/mindspore/ccsrc/minddata/dataset/util/buddy.cc index 2c9c0305d34..bb11771ad0a 100644 --- a/mindspore/ccsrc/minddata/dataset/util/buddy.cc +++ b/mindspore/ccsrc/minddata/dataset/util/buddy.cc @@ -65,6 +65,8 @@ Status BuddySpace::Init() { } Status BuddySpace::Alloc(const uint64_t sz, BSpaceDescriptor *desc, addr_t *p) noexcept { + RETURN_UNEXPECTED_IF_NULL(desc); + RETURN_UNEXPECTED_IF_NULL(p); std::lock_guard lock(mutex_); addr_t addr = AllocNoLock(sz, desc); if (addr != NOSPACE) { @@ -100,6 +102,10 @@ void BuddySpace::FreeNoLock(const BSpaceDescriptor *desc) { } void BuddySpace::Free(const BSpaceDescriptor *desc) { + if (desc == nullptr) { + MS_LOG(ERROR) << "The pointer[desc] is null."; + return; + } std::lock_guard lock(mutex_); return FreeNoLock(desc); } @@ -135,6 +141,18 @@ std::ostream &operator<<(std::ostream &os, const BuddySpace &s) { return os; } +uint32_t BuddySpace::SizeToBlock(const uint64_t sz) const { + if (min_ == 0) { + MS_LOG(ERROR) << "min_ can not be zero."; + return 0; + } + uint32_t reqSize = (sz / min_); + if (sz % min_) { + reqSize++; + } + return reqSize; +} + void BuddySpace::GetBuddySegState(const rel_addr_t rel_addr, size_t *rel_sz, STATE *st) const { const int32_t kAddrOffset = 4; const int32_t kShiftOffset = 2; diff --git a/mindspore/ccsrc/minddata/dataset/util/buddy.h b/mindspore/ccsrc/minddata/dataset/util/buddy.h index 97834c1c436..1264001431a 100644 --- a/mindspore/ccsrc/minddata/dataset/util/buddy.h +++ b/mindspore/ccsrc/minddata/dataset/util/buddy.h @@ -105,13 +105,7 @@ class BuddySpace { void FreeNoLock(const BSpaceDescriptor *desc); - uint32_t SizeToBlock(const uint64_t sz) const { - uint32_t reqSize = (sz / min_); - if (sz % min_) { - reqSize++; - } - return reqSize; - } + uint32_t SizeToBlock(const uint64_t sz) const; void GetBuddySegState(const rel_addr_t rel_addr, size_t *rel_sz, STATE *st) const; diff --git a/mindspore/ccsrc/minddata/dataset/util/json_helper.cc b/mindspore/ccsrc/minddata/dataset/util/json_helper.cc index ea721d42035..56e5e460bbc 100644 --- a/mindspore/ccsrc/minddata/dataset/util/json_helper.cc +++ b/mindspore/ccsrc/minddata/dataset/util/json_helper.cc @@ -31,6 +31,7 @@ namespace dataset { Status JsonHelper::CreateAlbum(const std::string &in_dir, const std::string &out_dir) { // in check Path base_dir = Path(in_dir); + RETURN_IF_NOT_OK(RealPath(in_dir)); if (!base_dir.IsDirectory() || !base_dir.Exists()) { RETURN_STATUS_UNEXPECTED("Input dir is not a directory or doesn't exist"); } @@ -41,8 +42,8 @@ Status JsonHelper::CreateAlbum(const std::string &in_dir, const std::string &out // iterate over in dir and create json for all images uint64_t index = 0; auto dir_it = Path::DirIterator::OpenDirectory(&base_dir); - while (dir_it->hasNext()) { - Path v = dir_it->next(); + while (dir_it->HasNext()) { + Path v = dir_it->Next(); // check if found file fits image extension // create json file in output dir with the path @@ -53,6 +54,12 @@ Status JsonHelper::CreateAlbum(const std::string &in_dir, const std::string &out return Status::OK(); } +Status JsonHelper::RealPath(const std::string &path) { + std::string real_path; + RETURN_IF_NOT_OK(Path::RealPath(path, real_path)); + return Status::OK(); +} + // A print method typically used for debugging void JsonHelper::Print(std::ostream &out) const { out << " Data Helper" @@ -65,10 +72,16 @@ Status JsonHelper::UpdateArray(const std::string &in_file, const std::string &ke Path in = Path(in_file); nlohmann::json js; if (in.Exists()) { - std::ifstream in_stream(in_file); - MS_LOG(INFO) << "Filename: " << in_file << "."; - in_stream >> js; - in_stream.close(); + RETURN_IF_NOT_OK(RealPath(in_file)); + try { + std::ifstream in_stream(in_file); + MS_LOG(INFO) << "Filename: " << in_file << "."; + in_stream >> js; + in_stream.close(); + } catch (const std::exception &err) { + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file + + ", please delete it and try again!"); + } } js[key] = value; MS_LOG(INFO) << "Write outfile is: " << js << "."; @@ -94,12 +107,18 @@ Status JsonHelper::RemoveKey(const std::string &in_file, const std::string &key, Path in = Path(in_file); nlohmann::json js; if (in.Exists()) { - std::ifstream in_stream(in_file); - MS_LOG(INFO) << "Filename: " << in_file << "."; - in_stream >> js; - in_stream.close(); + RETURN_IF_NOT_OK(RealPath(in_file)); + try { + std::ifstream in_stream(in_file); + MS_LOG(INFO) << "Filename: " << in_file << "."; + in_stream >> js; + in_stream.close(); + } catch (const std::exception &err) { + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file + + ", please delete it and try again!"); + } } - js.erase(key); + (void)js.erase(key); MS_LOG(INFO) << "Write outfile is: " << js << "."; if (out_file == "") { std::ofstream o(in_file, std::ofstream::trunc); diff --git a/mindspore/ccsrc/minddata/dataset/util/json_helper.h b/mindspore/ccsrc/minddata/dataset/util/json_helper.h index 26541438794..cfa729a3a5c 100644 --- a/mindspore/ccsrc/minddata/dataset/util/json_helper.h +++ b/mindspore/ccsrc/minddata/dataset/util/json_helper.h @@ -70,13 +70,20 @@ class JsonHelper { Path in = Path(in_file); nlohmann::json js; if (in.Exists()) { - std::ifstream in(in_file); - MS_LOG(INFO) << "Filename: " << in_file << "."; - in >> js; - in.close(); + RETURN_IF_NOT_OK(RealPath(in_file)); + try { + std::ifstream in_stream(in_file); + MS_LOG(INFO) << "Filename: " << in_file << "."; + in_stream >> js; + in_stream.close(); + } catch (const std::exception &err) { + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file + + ", please delete it and try again!"); + } } js[key] = value; MS_LOG(INFO) << "Write outfile is: " << js << "."; + if (out_file == "") { std::ofstream o(in_file, std::ofstream::trunc); o << js; @@ -107,10 +114,16 @@ class JsonHelper { Path in = Path(in_file); nlohmann::json js; if (in.Exists()) { - std::ifstream in(in_file); - MS_LOG(INFO) << "Filename: " << in_file << "."; - in >> js; - in.close(); + RETURN_IF_NOT_OK(RealPath(in_file)); + try { + std::ifstream in_stream(in_file); + MS_LOG(INFO) << "Filename: " << in_file << "."; + in_stream >> js; + in_stream.close(); + } catch (const std::exception &err) { + RETURN_STATUS_UNEXPECTED("Invalid file, failed to open json file: " + in_file + + ", please delete it and try again!"); + } } js[key] = value; MS_LOG(INFO) << "Write outfile is: " << js << "."; @@ -161,7 +174,9 @@ class JsonHelper { template Status WriteBinFile(const std::string &in_file, T *data, size_t length) { try { - std::ofstream o(in_file, std::ios::binary | std::ios::out); + std::string real_in_file; + RETURN_IF_NOT_OK(Path::RealPath(in_file, real_in_file)); + std::ofstream o(real_in_file, std::ios::binary | std::ios::out); if (!o.is_open()) { RETURN_STATUS_UNEXPECTED("Error opening Bin file to write"); } @@ -185,7 +200,7 @@ class JsonHelper { size_t DumpData(const unsigned char *tensor_addr, const size_t &tensor_size, void *addr, const size_t &buffer_size); /// \brief Helper function to delete key in json file - /// note This function will return okay even if key not found + /// \note This function will return okay even if key not found /// \param[in] in_file Json file to remove key from /// \param[in] key The key to remove /// \return Status The status code returned @@ -195,10 +210,16 @@ class JsonHelper { /// \param out - The output stream to write output to void Print(std::ostream &out) const; + /// \brief Helper function to check real path + /// \note This function will return okay even if key not found + /// \param[in] path Path to Json file + /// \return Status The status code returned + Status RealPath(const std::string &path); + /// \brief << Stream output operator overload - /// \notes This allows you to write the debug print info using stream operators + /// \note This allows you to write the debug print info using stream operators /// \param out Reference to the output stream being overloaded - /// \param ds Reference to the DataSchema to display + /// \param dh Reference to the DataSchema to display /// \return The output stream must be returned friend std::ostream &operator<<(std::ostream &out, const JsonHelper &dh) { dh.Print(out); diff --git a/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc b/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc index a61bcf75498..47560b71173 100644 --- a/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc +++ b/mindspore/ccsrc/minddata/dataset/util/numa_interface.cc @@ -27,6 +27,14 @@ inline void *LoadLibrary(const char *name) { } inline void *GetNumaAdapterFunc(void *handle, const char *name) { + if (handle == nullptr) { + MS_LOG(ERROR) << "The pointer[handle] is null."; + return nullptr; + } + if (name == nullptr) { + MS_LOG(ERROR) << "The pointer[name] is null."; + return nullptr; + } void *func = dlsym(handle, name); return func; } diff --git a/mindspore/ccsrc/minddata/dataset/util/numa_interface.h b/mindspore/ccsrc/minddata/dataset/util/numa_interface.h index daa3c0f0583..19dad6d3a91 100644 --- a/mindspore/ccsrc/minddata/dataset/util/numa_interface.h +++ b/mindspore/ccsrc/minddata/dataset/util/numa_interface.h @@ -16,6 +16,7 @@ #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_ #define MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_NUMA_INTERFACE_H_ +#include "minddata/dataset/util/log_adapter.h" #include "minddata/dataset/util/status.h" namespace mindspore { diff --git a/mindspore/ccsrc/minddata/dataset/util/path.cc b/mindspore/ccsrc/minddata/dataset/util/path.cc index a2764f2a33f..e81680533be 100644 --- a/mindspore/ccsrc/minddata/dataset/util/path.cc +++ b/mindspore/ccsrc/minddata/dataset/util/path.cc @@ -20,7 +20,6 @@ #include #include #include -#include #include "./securec.h" #include "utils/ms_utils.h" @@ -324,7 +323,7 @@ Path::DirIterator::DirIterator(Path *f) : dir_(f), dp_(nullptr), entry_(nullptr) dp_ = opendir(f->toString().c_str()); } -bool Path::DirIterator::hasNext() { +bool Path::DirIterator::HasNext() { do { entry_ = readdir(dp_); if (entry_) { @@ -337,7 +336,25 @@ bool Path::DirIterator::hasNext() { return (entry_ != nullptr); } -Path Path::DirIterator::next() { return (*(this->dir_) / Path(entry_->d_name)); } +Path Path::DirIterator::Next() { return (*(this->dir_) / Path(entry_->d_name)); } + +Status Path::RealPath(const std::string &path, std::string &realpath_str) { + char real_path[PATH_MAX] = {0}; + // input_path is only file_name +#if defined(_WIN32) || defined(_WIN64) + CHECK_FAIL_RETURN_UNEXPECTED(path.length() < PATH_MAX, + "The length of path: " + path + " exceeds limit: " + std::to_string(PATH_MAX)); + auto ret = _fullpath(real_path, common::SafeCStr(path), PATH_MAX); + CHECK_FAIL_RETURN_UNEXPECTED(ret != nullptr, "The file " + path + " does not exist."); +#else + CHECK_FAIL_RETURN_UNEXPECTED(path.length() < NAME_MAX, + "The length of path: " + path + " exceeds limit: " + std::to_string(NAME_MAX)); + auto ret = realpath(common::SafeCStr(path), real_path); + CHECK_FAIL_RETURN_UNEXPECTED(ret != nullptr, "The file " + path + " does not exist."); +#endif + realpath_str = std::string(real_path); + return Status::OK(); +} std::ostream &operator<<(std::ostream &os, const Path &s) { os << s.path_; diff --git a/mindspore/ccsrc/minddata/dataset/util/path.h b/mindspore/ccsrc/minddata/dataset/util/path.h index cb131ad5ae0..ea340b07916 100644 --- a/mindspore/ccsrc/minddata/dataset/util/path.h +++ b/mindspore/ccsrc/minddata/dataset/util/path.h @@ -32,9 +32,9 @@ class Path { ~DirIterator(); - bool hasNext(); + bool HasNext(); - Path next(); + Path Next(); private: explicit DirIterator(Path *f); @@ -116,6 +116,8 @@ class Path { std::string Basename(); + static Status RealPath(const std::string &path, std::string &realpath_str); // NOLINT + friend std::ostream &operator<<(std::ostream &os, const Path &s); private: diff --git a/mindspore/ccsrc/minddata/dataset/util/slice.h b/mindspore/ccsrc/minddata/dataset/util/slice.h index ca76b546a0f..0c3f07f9295 100644 --- a/mindspore/ccsrc/minddata/dataset/util/slice.h +++ b/mindspore/ccsrc/minddata/dataset/util/slice.h @@ -105,7 +105,7 @@ class WritableSlice : public ReadableSlice { WritableSlice &operator=(const WritableSlice &lhs) { if (this != &lhs) { mutable_data_ = lhs.mutable_data_; - ReadableSlice::operator=(lhs); + (void)ReadableSlice::operator=(lhs); } return *this; } @@ -119,7 +119,7 @@ class WritableSlice : public ReadableSlice { if (this != &lhs) { mutable_data_ = lhs.mutable_data_; lhs.mutable_data_ = nullptr; - ReadableSlice::operator=(std::move(lhs)); + (void)ReadableSlice::operator=(std::move(lhs)); } return *this; } diff --git a/mindspore/ccsrc/minddata/dataset/util/system_pool.h b/mindspore/ccsrc/minddata/dataset/util/system_pool.h index 789252dc8c8..4e43ef235af 100644 --- a/mindspore/ccsrc/minddata/dataset/util/system_pool.h +++ b/mindspore/ccsrc/minddata/dataset/util/system_pool.h @@ -39,9 +39,14 @@ class SystemPool : public MemoryPool { Status Allocate(size_t n, void **pp) override { return DeMalloc(n, pp, false); } - void Deallocate(void *p) override { free(p); } + void Deallocate(void *p) override { + if (p != nullptr) { + free(p); + } + } Status Reallocate(void **p, size_t old_sz, size_t new_sz) override { + RETURN_UNEXPECTED_IF_NULL(p); if (old_sz >= new_sz) { // Do nothing if we shrink. return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/util/task_manager.cc b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc index 3e7303fbb26..635113cb558 100644 --- a/mindspore/ccsrc/minddata/dataset/util/task_manager.cc +++ b/mindspore/ccsrc/minddata/dataset/util/task_manager.cc @@ -53,7 +53,7 @@ Status TaskManager::CreateAsyncTask(const std::string &my_name, const std::funct // Track all the TaskGroup. Used for control-c { LockGuard lck(&tg_lock_); - this->grp_list_.insert(vg); + (void)this->grp_list_.insert(vg); } RETURN_IF_NOT_OK((*task)->wp_.Register(vg)); RETURN_IF_NOT_OK((*task)->Run()); @@ -170,7 +170,7 @@ Status TaskManager::DoServiceStart() { watchdog_grp_ = nullptr; return rc; } - grp_list_.erase(watchdog_grp_); + (void)grp_list_.erase(watchdog_grp_); lru_.Remove(watchdog_); #endif return Status::OK(); diff --git a/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h index fd3aa9d2d87..e2bff12c469 100644 --- a/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h +++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_header.h @@ -133,7 +133,7 @@ class __attribute__((visibility("default"))) ShardHeader { MSRStatus FileToPages(const std::string dump_file_name); - static MSRStatus initialize(const std::shared_ptr *header_ptr, const json &schema, + static MSRStatus Initialize(const std::shared_ptr *header_ptr, const json &schema, const std::vector &index_fields, std::vector &blob_fields, uint64_t &schema_id); diff --git a/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h index 8b5d58c74d2..474d6bb6d41 100644 --- a/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h +++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_index_generator.h @@ -57,7 +57,7 @@ class __attribute__((visibility("default"))) ShardIndexGenerator { /// \brief create databases for indexes MSRStatus WriteToDatabase(); - static MSRStatus finalize(const std::vector file_names); + static MSRStatus Finalize(const std::vector file_names); private: static int Callback(void *not_used, int argc, char **argv, char **az_col_name); diff --git a/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h index afff0ecae7a..d014536ff3b 100644 --- a/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h +++ b/mindspore/ccsrc/minddata/mindrecord/include/shard_writer.h @@ -112,7 +112,7 @@ class __attribute__((visibility("default"))) ShardWriter { const std::map>> &row_bin_data, std::shared_ptr> *output); - static MSRStatus initialize(const std::unique_ptr *writer_ptr, + static MSRStatus Initialize(const std::unique_ptr *writer_ptr, const std::vector &file_names); private: diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc index 59a68116912..4c6681e1516 100644 --- a/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc +++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_index_generator.cc @@ -499,7 +499,6 @@ ROW_DATA ShardIndexGenerator::GenerateRowData(int shard_no, const std::mapGetPageID()) + header_size_ + cur_raw_page_offset, std::ios::beg); if (!io_seekg.good() || io_seekg.fail() || io_seekg.bad()) { MS_LOG(ERROR) << "File seekg failed"; - in.close(); return {FAILED, {}}; } @@ -511,7 +510,6 @@ ROW_DATA ShardIndexGenerator::GenerateRowData(int shard_no, const std::map(&schema_size), kInt64Len); if (!io_read.good() || io_read.fail() || io_read.bad()) { MS_LOG(ERROR) << "File read failed"; - in.close(); return {FAILED, {}}; } @@ -598,15 +596,21 @@ MSRStatus ShardIndexGenerator::ExecuteTransaction(const int &shard_no, std::pair auto sql = GenerateRawSQL(fields_); if (sql.first != SUCCESS) { MS_LOG(ERROR) << "Generate raw SQL failed"; + in.close(); + sqlite3_close(db.second); return FAILED; } auto data = GenerateRowData(shard_no, blob_id_to_page_id, raw_page_id, in); if (data.first != SUCCESS) { MS_LOG(ERROR) << "Generate raw data failed"; + in.close(); + sqlite3_close(db.second); return FAILED; } if (BindParameterExecuteSQL(db.second, sql.second, data.second) == FAILED) { MS_LOG(ERROR) << "Execute SQL failed"; + in.close(); + sqlite3_close(db.second); return FAILED; } MS_LOG(INFO) << "Insert " << data.second.size() << " rows to index db."; @@ -690,7 +694,7 @@ void ShardIndexGenerator::DatabaseWriter() { shard_no = task_++; } } -MSRStatus ShardIndexGenerator::finalize(const std::vector file_names) { +MSRStatus ShardIndexGenerator::Finalize(const std::vector file_names) { if (file_names.empty()) { MS_LOG(ERROR) << "Mindrecord files is empty."; return FAILED; diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc index f182d503b1e..ec5bd0436df 100644 --- a/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc +++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_reader.cc @@ -101,6 +101,7 @@ MSRStatus ShardReader::Init(const std::vector &file_paths, bool loa sqlite3 *db = nullptr; auto ret3 = VerifyDataset(&db, file); if (ret3 != SUCCESS) { + sqlite3_close(db); return FAILED; } @@ -154,6 +155,7 @@ MSRStatus ShardReader::VerifyDataset(sqlite3 **db, const string &file) { auto rc = sqlite3_open_v2(common::SafeCStr(file + ".db"), db, SQLITE_OPEN_READONLY, nullptr); if (rc != SQLITE_OK) { MS_LOG(ERROR) << "Invalid file, failed to open database: " << file + ".db, error: " << sqlite3_errmsg(*db); + sqlite3_close(*db); return FAILED; } MS_LOG(DEBUG) << "Opened database successfully"; @@ -177,6 +179,7 @@ MSRStatus ShardReader::VerifyDataset(sqlite3 **db, const string &file) { return FAILED; } } + sqlite3_free(errmsg); return SUCCESS; } @@ -400,16 +403,19 @@ MSRStatus ShardReader::ConvertLabelToJson(const std::vectorclose(); return FAILED; } catch (std::invalid_argument &e) { MS_LOG(ERROR) << "Invalid argument: " << e.what(); + fs->close(); return FAILED; } catch (...) { MS_LOG(ERROR) << "Exception was caught while convert label to json."; + fs->close(); return FAILED; } } - + fs->close(); return SUCCESS; } // namespace mindrecord @@ -499,6 +505,7 @@ void ShardReader::GetClassesInShard(sqlite3 *db, int shard_id, const std::string for (int i = 0; i < static_cast(columns.size()); ++i) { category_ptr->emplace(columns[i][0]); } + sqlite3_free(errmsg); } ROW_GROUPS ShardReader::ReadAllRowGroup(const std::vector &columns) { @@ -931,8 +938,8 @@ int64_t ShardReader::GetNumClasses(const std::string &category_field) { std::string sql = "SELECT DISTINCT " + ret.second + " FROM INDEXES"; std::vector threads = std::vector(shard_count); auto category_ptr = std::make_shared>(); + sqlite3 *db = nullptr; for (int x = 0; x < shard_count; x++) { - sqlite3 *db = nullptr; int rc = sqlite3_open_v2(common::SafeCStr(file_paths_[x] + ".db"), &db, SQLITE_OPEN_READONLY, nullptr); if (SQLITE_OK != rc) { MS_LOG(ERROR) << "Invalid file, failed to open database: " << file_paths_[x] + ".db, error: " @@ -941,10 +948,10 @@ int64_t ShardReader::GetNumClasses(const std::string &category_field) { } threads[x] = std::thread(&ShardReader::GetClassesInShard, this, db, x, sql, category_ptr); } - for (int x = 0; x < shard_count; x++) { threads[x].join(); } + sqlite3_close(db); return category_ptr->size(); } diff --git a/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc index c23e2656084..e80d16c2124 100644 --- a/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc +++ b/mindspore/ccsrc/minddata/mindrecord/io/shard_writer.cc @@ -569,6 +569,7 @@ int ShardWriter::LockWriter(bool parallel_writer) { auto realpath = Common::GetRealPath(file); if (!realpath.has_value()) { MS_LOG(ERROR) << "Get real path failed, path=" << file; + close(fd); return -1; } @@ -576,6 +577,7 @@ int ShardWriter::LockWriter(bool parallel_writer) { fs->open(realpath.value(), std::ios::in | std::ios::out | std::ios::binary); if (fs->fail()) { MS_LOG(ERROR) << "Invalid file, failed to open file: " << file; + close(fd); return -1; } file_streams_.push_back(fs); @@ -583,6 +585,7 @@ int ShardWriter::LockWriter(bool parallel_writer) { if (shard_header_->FileToPages(pages_file_) == FAILED) { MS_LOG(ERROR) << "Invalid data, failed to read pages from file."; + close(fd); return -1; } return fd; @@ -1212,6 +1215,7 @@ MSRStatus ShardWriter::WriteShardHeader() { uint64_t line_len = bin_header.size(); if (line_len + kInt64Len > header_size_) { MS_LOG(ERROR) << "Shard header is too big"; + file_streams_[shard_id]->close(); return FAILED; } @@ -1304,7 +1308,7 @@ void ShardWriter::SetLastBlobPage(const int &shard_id, std::shared_ptr &la } } -MSRStatus ShardWriter::initialize(const std::unique_ptr *writer_ptr, +MSRStatus ShardWriter::Initialize(const std::unique_ptr *writer_ptr, const std::vector &file_names) { if (writer_ptr == nullptr) { MS_LOG(ERROR) << "ShardWriter pointer is NULL."; diff --git a/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc index 040aa115e3e..737b6e93c2b 100644 --- a/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc +++ b/mindspore/ccsrc/minddata/mindrecord/meta/shard_header.cc @@ -372,9 +372,10 @@ std::vector ShardHeader::SerializeHeader() { std::string ShardHeader::SerializeIndexFields() { json j; auto fields = index_->GetFields(); - for (const auto &field : fields) { - j.push_back({{"schema_id", field.first}, {"index_field", field.second}}); - } + (void)std::transform(fields.begin(), fields.end(), std::back_inserter(j), + [](const std::pair &field) -> json { + return {{"schema_id", field.first}, {"index_field", field.second}}; + }); return j.dump(); } @@ -382,9 +383,8 @@ std::vector ShardHeader::SerializePage() { std::vector pages; for (auto &shard_pages : pages_) { json j; - for (const auto &p : shard_pages) { - j.emplace_back(p->GetPage()); - } + (void)std::transform(shard_pages.begin(), shard_pages.end(), std::back_inserter(j), + [](const std::shared_ptr &p) { return p->GetPage(); }); pages.emplace_back(j.dump()); } return pages; @@ -392,25 +392,22 @@ std::vector ShardHeader::SerializePage() { std::string ShardHeader::SerializeStatistics() { json j; - for (const auto &stats : statistics_) { - j.emplace_back(stats->GetStatistics()); - } + (void)std::transform(statistics_.begin(), statistics_.end(), std::back_inserter(j), + [](const std::shared_ptr &stats) { return stats->GetStatistics(); }); return j.dump(); } std::string ShardHeader::SerializeSchema() { json j; - for (const auto &schema : schema_) { - j.emplace_back(schema->GetSchema()); - } + (void)std::transform(schema_.begin(), schema_.end(), std::back_inserter(j), + [](const std::shared_ptr &schema) { return schema->GetSchema(); }); return j.dump(); } std::string ShardHeader::SerializeShardAddress() { json j; - for (const auto &addr : shard_addresses_) { - j.emplace_back(GetFileName(addr).second); - } + (void)std::transform(shard_addresses_.begin(), shard_addresses_.end(), std::back_inserter(j), + [](const std::string &addr) { return GetFileName(addr).second; }); return j.dump(); } @@ -759,7 +756,7 @@ MSRStatus ShardHeader::FileToPages(const std::string dump_file_name) { return SUCCESS; } -MSRStatus ShardHeader::initialize(const std::shared_ptr *header_ptr, const json &schema, +MSRStatus ShardHeader::Initialize(const std::shared_ptr *header_ptr, const json &schema, const std::vector &index_fields, std::vector &blob_fields, uint64_t &schema_id) { if (header_ptr == nullptr) { @@ -775,9 +772,8 @@ MSRStatus ShardHeader::initialize(const std::shared_ptr *header_ptr // create index std::vector> id_index_fields; if (!index_fields.empty()) { - for (auto &el : index_fields) { - id_index_fields.emplace_back(schema_id, el); - } + (void)std::transform(index_fields.begin(), index_fields.end(), std::back_inserter(id_index_fields), + [schema_id](const std::string &el) { return std::make_pair(schema_id, el); }); if (SUCCESS != (*header_ptr)->AddIndexFields(id_index_fields)) { MS_LOG(ERROR) << "Got unexpected error when adding mindrecord index."; return FAILED; diff --git a/mindspore/ccsrc/utils/tensorprint_utils.cc b/mindspore/ccsrc/utils/tensorprint_utils.cc index f642d0301c0..e64aa3388a8 100644 --- a/mindspore/ccsrc/utils/tensorprint_utils.cc +++ b/mindspore/ccsrc/utils/tensorprint_utils.cc @@ -279,6 +279,7 @@ void TensorPrint::operator()() { acltdtDataset *acl_dataset = acltdtCreateDataset(); if (acl_dataset == nullptr) { MS_LOG(ERROR) << "Failed to create acl dateaset."; + break; } if (acltdtReceiveTensor(acl_handle_, acl_dataset, -1 /* no timeout */) != ACL_SUCCESS) { MS_LOG(ERROR) << "AclHandle failed to receive tensor."; @@ -295,6 +296,7 @@ void TensorPrint::operator()() { acltdtDataset *acl_dataset = acltdtCreateDataset(); if (acl_dataset == nullptr) { MS_LOG(ERROR) << "Failed to create acl dateaset."; + break; } if (acltdtReceiveTensor(acl_handle_, acl_dataset, -1 /* no timeout */) != ACL_SUCCESS) { MS_LOG(ERROR) << "Acltdt failed to receive tensor."; diff --git a/mindspore/lite/minddata/wrapper/album_op_android.cc b/mindspore/lite/minddata/wrapper/album_op_android.cc index 472ce0a1305..103316aa555 100644 --- a/mindspore/lite/minddata/wrapper/album_op_android.cc +++ b/mindspore/lite/minddata/wrapper/album_op_android.cc @@ -277,7 +277,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { // consider templating this function to handle all ints - if (data_schema_->column(col_num).type() == DataType::DE_INT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -286,7 +286,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); - } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -297,14 +297,14 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); } else { RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither int32 nor int64, it is " + - data_schema_->column(col_num).type().ToString()); + data_schema_->Column(col_num).Type().ToString()); } return Status::OK(); } Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { // consider templating this function to handle all ints - if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -313,7 +313,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col (void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); }); RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); - } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) { std::vector data; // Iterate over the integer list and add those values to the output shape tensor @@ -324,13 +324,13 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, tensor)); } else { RETURN_STATUS_UNEXPECTED("Invalid data, column type is neither float32 nor float64, it is " + - data_schema_->column(col_num).type().ToString()); + data_schema_->Column(col_num).Type().ToString()); } return Status::OK(); } Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr *tensor) { - if (data_schema_->column(col_num).type() == DataType::DE_STRING) { + if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) { RETURN_IF_NOT_OK(Tensor::CreateScalar(file, tensor)); return Status::OK(); } @@ -343,7 +343,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorPtr Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorPtr *tensor) { // hack to get the file name without extension, the 1 is to get rid of the backslash character - RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), tensor)); + RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), tensor)); return Status::OK(); } @@ -352,11 +352,11 @@ Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorPtr *tensor) { // Float64 doesn't work with reinterpret cast here. Otherwise we limit the float in the schema to // only be float32, seems like a weird limitation to impose Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { - if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) { double data = json_obj; MS_LOG(INFO) << "double found: " << json_obj << "."; RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); - } else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) { float data = json_obj; RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); MS_LOG(INFO) << "float found: " << json_obj << "."; @@ -366,11 +366,11 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, // Loads a tensor with int value, we have to cast the value to type specified in the schema. Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorPtr *tensor) { - if (data_schema_->column(col_num).type() == DataType::DE_INT64) { + if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) { int64_t data = json_obj; MS_LOG(INFO) << "int64 found: " << json_obj << "."; RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); - } else if (data_schema_->column(col_num).type() == DataType::DE_INT32) { + } else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) { int32_t data = json_obj; RETURN_IF_NOT_OK(Tensor::CreateScalar(data, tensor)); MS_LOG(INFO) << "int32 found: " << json_obj << "."; @@ -383,17 +383,17 @@ Status AlbumOp::LoadIntTensorRowByIndex(int index, bool is_array, const nlohmann int i = index; // int value if (!is_array && - (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { + (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) { TensorPtr tensor; RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } // int array if (is_array && - (data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) { + (data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) { TensorPtr tensor; RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } return Status::OK(); } @@ -402,59 +402,59 @@ Status AlbumOp::LoadTensorRowByIndex(int index, const std::string &file, const n std::unordered_map> *map_row) { int i = index; // special case to handle - if (data_schema_->column(i).name() == "id") { + if (data_schema_->Column(i).name() == "id") { // id is internal, special case to load from file TensorPtr tensor; RETURN_IF_NOT_OK(LoadIDTensor(file, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } // find if key does not exist, insert placeholder nullptr if not found - if (js.find(data_schema_->column(i).name()) == js.end()) { + if (js.find(data_schema_->Column(i).Name()) == js.end()) { // iterator not found, push nullptr as placeholder - MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << "."; + MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << "."; TensorPtr tensor; RETURN_IF_NOT_OK(LoadEmptyTensor(i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } - nlohmann::json column_value = js.at(data_schema_->column(i).name()); - MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << "."; + nlohmann::json column_value = js.at(data_schema_->Column(i).Name()); + MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << "."; bool is_array = column_value.is_array(); // load single string - if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) { + if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) { TensorPtr tensor; RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } // load string array - if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) { + if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) { TensorPtr tensor; RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } // load image file - if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) { + if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) { std::string image_file_path = column_value; TensorPtr tensor; RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; uint32_t orientation = GetOrientation(image_file_path); TensorPtr scalar_tensor; RETURN_IF_NOT_OK(Tensor::CreateScalar(orientation, &scalar_tensor)); (*map_row)["orientation"] = scalar_tensor; } // load float value - if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || - data_schema_->column(i).type() == DataType::DE_FLOAT64)) { + if (!is_array && (data_schema_->Column(i).Type() == DataType::DE_FLOAT32 || + data_schema_->Column(i).Type() == DataType::DE_FLOAT64)) { TensorPtr tensor; RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } // load float array - if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 || - data_schema_->column(i).type() == DataType::DE_FLOAT64)) { + if (is_array && (data_schema_->Column(i).Type() == DataType::DE_FLOAT32 || + data_schema_->Column(i).Type() == DataType::DE_FLOAT64)) { TensorPtr tensor; RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, &tensor)); - (*map_row)[data_schema_->column(i).name()] = tensor; + (*map_row)[data_schema_->Column(i).Name()] = tensor; } RETURN_IF_NOT_OK(LoadIntTensorRowByIndex(i, is_array, column_value, map_row)); @@ -487,7 +487,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, // loop over each column descriptor, this can optimized by switch cases for (int32_t i = 0; i < columns; i++) { - if (!IsReadColumn(data_schema_->column(i).name())) { + if (!IsReadColumn(data_schema_->Column(i).Name())) { continue; } RETURN_IF_NOT_OK(LoadTensorRowByIndex(i, file, js, map_row)); diff --git a/tests/ut/cpp/dataset/common/bboxop_common.cc b/tests/ut/cpp/dataset/common/bboxop_common.cc index 70c794856c0..18819b9a88f 100644 --- a/tests/ut/cpp/dataset/common/bboxop_common.cc +++ b/tests/ut/cpp/dataset/common/bboxop_common.cc @@ -67,8 +67,8 @@ void BBoxOpCommon::GetInputImagesAndAnnotations(const std::string &dir, std::siz EXPECT_TRUE(dir_path.Exists()); } // get image file paths - while (image_dir_itr->hasNext()) { - Path image_path = image_dir_itr->next(); + while (image_dir_itr->HasNext()) { + Path image_path = image_dir_itr->Next(); if (image_path.Extension() == std::string(kImageExt)) { paths_to_fetch.push_back(image_path.toString()); } diff --git a/tests/ut/cpp/dataset/data_helper_test.cc b/tests/ut/cpp/dataset/data_helper_test.cc index b1ffefe6b71..5600e479a0f 100644 --- a/tests/ut/cpp/dataset/data_helper_test.cc +++ b/tests/ut/cpp/dataset/data_helper_test.cc @@ -50,7 +50,7 @@ TEST_F(MindDataTestDataHelper, MindDataTestHelper) { std::string file_path = datasets_root_path_ + "/testAlbum/images/1.json"; DataHelper dh; std::vector new_label = {"3", "4"}; - Status rc = dh.UpdateArray(file_path, "label", new_label); + Status rc = dh.UpdateArray(file_path, "label", new_label); if (rc.IsError()) { MS_LOG(ERROR) << "Return code error detected during label update: " << "."; EXPECT_TRUE(false); diff --git a/tests/ut/cpp/dataset/path_test.cc b/tests/ut/cpp/dataset/path_test.cc index b36b38bbc70..9c215f3632d 100644 --- a/tests/ut/cpp/dataset/path_test.cc +++ b/tests/ut/cpp/dataset/path_test.cc @@ -35,8 +35,8 @@ TEST_F(MindDataTestPath, Test1) { auto dir_it = Path::DirIterator::OpenDirectory(&f); ASSERT_NE(dir_it.get(), nullptr); int i = 0; - while (dir_it->hasNext()) { - Path v = dir_it->next(); + while (dir_it->HasNext()) { + Path v = dir_it->Next(); MS_LOG(DEBUG) << v.toString() << "\n"; i++; if (i == 10) {