forked from mindspore-Ecosystem/mindspore
fix minddata code clean
This commit is contained in:
parent
5503ce674c
commit
528a1e8d3d
|
@ -592,14 +592,16 @@ SchemaObj::SchemaObj(const std::vector<char> &schema_file) : data_(std::make_sha
|
|||
|
||||
// SchemaObj Init function
|
||||
Status SchemaObj::Init() {
|
||||
if (!data_->schema_file_.empty()) {
|
||||
Path schema_file(data_->schema_file_);
|
||||
if (data_ != nullptr && !data_->schema_file_.empty()) {
|
||||
std::string real_path;
|
||||
RETURN_IF_NOT_OK(Path::RealPath(data_->schema_file_, real_path));
|
||||
Path schema_file(real_path);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(schema_file.Exists(),
|
||||
"The file " + data_->schema_file_ + " does not exist or permission denied!");
|
||||
|
||||
nlohmann::json js;
|
||||
try {
|
||||
std::ifstream in(data_->schema_file_);
|
||||
std::ifstream in(real_path);
|
||||
in >> js;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(),
|
||||
"\"columns\" node is required in the schema json file.");
|
||||
|
|
|
@ -27,7 +27,8 @@ Iterator::~Iterator() { Stop(); }
|
|||
|
||||
// Get the next row from the data pipeline.
|
||||
Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) {
|
||||
// Clean data row
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
// Clean data buffer
|
||||
row->clear();
|
||||
std::unordered_map<std::string, std::shared_ptr<dataset::Tensor>> md_map;
|
||||
Status rc = consumer_->GetNextAsMap(&md_map);
|
||||
|
@ -47,6 +48,7 @@ Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) {
|
|||
// Get the next row from the data pipeline.
|
||||
Status Iterator::GetNextRow(MSTensorVec *row) {
|
||||
// Clean data row
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
row->clear();
|
||||
// create a dataset tensor row and fetch. Then we convert the output to MSTensor
|
||||
std::vector<std::shared_ptr<dataset::Tensor>> md_row;
|
||||
|
@ -84,6 +86,7 @@ Status Iterator::BuildAndLaunchTree(std::shared_ptr<Dataset> ds, int32_t num_epo
|
|||
PullIterator::PullIterator() : pull_consumer_(nullptr) {}
|
||||
// Get the next row from the data pipeline.
|
||||
Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
for (int i = 0; i < num_rows; i++) {
|
||||
std::vector<std::shared_ptr<dataset::Tensor>> md_row;
|
||||
Status rc = pull_consumer_->GetNextAsVector(&md_row);
|
||||
|
@ -105,6 +108,7 @@ Status PullIterator::GetRows(int32_t num_rows, std::vector<MSTensorVec> *const r
|
|||
}
|
||||
|
||||
Status PullIterator::GetNextRow(MSTensorVec *const row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pull_consumer_ != nullptr, "Consumer is nullptr.");
|
||||
std::vector<std::shared_ptr<dataset::Tensor>> md_row;
|
||||
Status rc = pull_consumer_->GetNextAsVector(&md_row);
|
||||
|
|
|
@ -107,6 +107,7 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) {
|
|||
nlohmann::json js;
|
||||
in >> js;
|
||||
rc = FromJson(js);
|
||||
in.close();
|
||||
} catch (const nlohmann::json::type_error &e) {
|
||||
std::ostringstream ss;
|
||||
ss << "Client file failed to load:\n" << e.what();
|
||||
|
|
|
@ -29,8 +29,10 @@ CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor))
|
|||
}
|
||||
|
||||
Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
|
||||
*out = std::allocate_shared<CVTensor>(*alloc, shape, type);
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
int64_t byte_size = (*out)->SizeInBytes();
|
||||
// Don't allocate if we have a tensor with no elements.
|
||||
if (byte_size != 0) {
|
||||
|
@ -41,6 +43,7 @@ Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPt
|
|||
}
|
||||
|
||||
Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
TensorPtr out_tensor;
|
||||
cv::Mat mat_local = mat;
|
||||
// if the input Mat's memory is not continuous, copy it to one block of memory
|
||||
|
@ -70,6 +73,9 @@ std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &sha
|
|||
}
|
||||
|
||||
std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
|
||||
if (t == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
std::shared_ptr<CVTensor> cv_t = std::dynamic_pointer_cast<CVTensor>(t);
|
||||
if (cv_t != nullptr) {
|
||||
return cv_t;
|
||||
|
@ -80,13 +86,13 @@ std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
|
|||
}
|
||||
|
||||
Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat) {
|
||||
std::pair<std::array<int, 2>, int> cv_shape_type = IsValidImage(shape, type);
|
||||
RETURN_UNEXPECTED_IF_NULL(data);
|
||||
RETURN_UNEXPECTED_IF_NULL(mat);
|
||||
const int kShapeAsDefault = 2;
|
||||
std::pair<std::array<int, kShapeAsDefault>, int> cv_shape_type = IsValidImage(shape, type);
|
||||
if (cv_shape_type.second == -1) {
|
||||
std::vector<dsize_t> sizes = shape.AsVector();
|
||||
std::vector<int> sizes32(sizes.begin(), sizes.end()); // convert long to int for usage with OpenCV
|
||||
if (static_cast<int>(shape.Rank()) != shape.Rank()) {
|
||||
RETURN_STATUS_UNEXPECTED("Error in creating CV mat. Wrong shape.");
|
||||
}
|
||||
|
||||
uint8_t cv_type = type.AsCVType();
|
||||
if (cv_type == kCVInvalidType) {
|
||||
|
@ -94,7 +100,7 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &
|
|||
}
|
||||
*mat = cv::Mat(static_cast<int>(shape.Rank()), &sizes32[0], cv_type, data);
|
||||
} else {
|
||||
*mat = cv::Mat(2, &(cv_shape_type.first[0]), cv_shape_type.second, data);
|
||||
*mat = cv::Mat(kShapeAsDefault, &(cv_shape_type.first[0]), cv_shape_type.second, data);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -113,10 +119,14 @@ Status CVTensor::ExpandDim(const dsize_t &axis) {
|
|||
|
||||
void CVTensor::Squeeze() {
|
||||
Tensor::Squeeze();
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
Status rc = this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
if (rc.IsError()) {
|
||||
MS_LOG(ERROR) << "Squeeze failed, error details is " << rc;
|
||||
}
|
||||
}
|
||||
|
||||
Status CVTensor::MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat) {
|
||||
RETURN_UNEXPECTED_IF_NULL(mat);
|
||||
uchar *start = nullptr;
|
||||
TensorShape remaining({-1});
|
||||
RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining));
|
||||
|
|
|
@ -143,15 +143,15 @@ class DataType {
|
|||
constexpr bool operator!=(const Type a) const { return type_ != a; }
|
||||
|
||||
// Disable this usage `if(d)` where d is of type DataType
|
||||
/// \return
|
||||
/// \return return nothing since we deiable this function.
|
||||
operator bool() = delete;
|
||||
|
||||
// To be used in Switch/case
|
||||
/// \return
|
||||
/// \return data type internal.
|
||||
operator Type() const { return type_; }
|
||||
|
||||
// The number of bytes needed to store one value of this type
|
||||
/// \return
|
||||
/// \return the number of bytes of the type.
|
||||
uint8_t SizeInBytes() const;
|
||||
|
||||
#ifndef ENABLE_ANDROID
|
||||
|
|
|
@ -41,15 +41,17 @@ DETensor::DETensor(std::shared_ptr<dataset::DeviceTensor> device_tensor_impl, bo
|
|||
: device_tensor_impl_(device_tensor_impl), name_("MindDataDeviceTensor"), is_device_(is_device) {
|
||||
// The sequence of shape_ is (width, widthStride, height, heightStride) in Dvpp module
|
||||
// We need to add [1]widthStride and [3]heightStride, which are actual YUV image shape, into shape_ attribute
|
||||
uint8_t flag = 0;
|
||||
for (auto &i : device_tensor_impl->GetYuvStrideShape()) {
|
||||
if (flag % 2 == 1) {
|
||||
int64_t j = static_cast<int64_t>(i);
|
||||
shape_.emplace_back(j);
|
||||
if (device_tensor_impl && device_tensor_impl->GetYuvStrideShape().size() > 0) {
|
||||
uint8_t flag = 0;
|
||||
for (auto &i : device_tensor_impl->GetYuvStrideShape()) {
|
||||
if (flag % 2 == 1) {
|
||||
int64_t j = static_cast<int64_t>(i);
|
||||
shape_.emplace_back(j);
|
||||
}
|
||||
++flag;
|
||||
}
|
||||
++flag;
|
||||
std::reverse(shape_.begin(), shape_.end());
|
||||
}
|
||||
std::reverse(shape_.begin(), shape_.end());
|
||||
MS_LOG(INFO) << "This is a YUV420 format image, one pixel takes 1.5 bytes. Therefore, the shape of"
|
||||
<< " image is in (H, W) format. You can search for more information about YUV420 format";
|
||||
}
|
||||
|
|
|
@ -23,7 +23,10 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) {
|
||||
const int kYuvDefaultChannels = 4;
|
||||
|
||||
DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type)
|
||||
: Tensor(shape, type), device_data_(nullptr), size_(0) {
|
||||
// grab the mem pool from global context and create the allocator for char data area
|
||||
std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
|
||||
data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
|
||||
|
@ -34,6 +37,7 @@ DeviceTensor::DeviceTensor(const TensorShape &shape, const DataType &type) : Ten
|
|||
Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type, std::shared_ptr<DeviceTensor> *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Invalid nullptr pointer.");
|
||||
const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator();
|
||||
*out = std::allocate_shared<DeviceTensor>(*alloc, shape, type);
|
||||
// if it's a string tensor and it has no elements, Just initialize the shape and type.
|
||||
|
@ -42,6 +46,7 @@ Status DeviceTensor::CreateEmpty(const TensorShape &shape, const DataType &type,
|
|||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory faiiled.");
|
||||
|
||||
int64_t bytes = (*out)->SizeInBytes();
|
||||
// Don't allocate if we have a tensor with no elements.
|
||||
|
@ -58,9 +63,11 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data
|
|||
CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(data_ptr != nullptr, "Data pointer is NULL");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(dataSize > 0, "Invalid data size");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Out pointer is NULL");
|
||||
|
||||
const DeviceTensorAlloc *alloc = GlobalContext::Instance()->device_tensor_allocator();
|
||||
*out = std::allocate_shared<DeviceTensor>(*alloc, shape, type);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
|
||||
|
||||
// if it's a string tensor and it has no elements, Just initialize the shape and type.
|
||||
if (!type.IsNumeric() && shape.NumOfElements() == 0) {
|
||||
|
@ -76,6 +83,8 @@ Status DeviceTensor::CreateFromDeviceMemory(const TensorShape &shape, const Data
|
|||
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(attributes.size() >= kYuvDefaultChannels,
|
||||
"Invalid attributes size, should be greater than 4.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
(*out)->SetAttributes(data_ptr, dataSize, attributes[0], attributes[1], attributes[2], attributes[3]),
|
||||
"Fail to set attributes for DeviceTensor");
|
||||
|
@ -129,6 +138,7 @@ Status DeviceTensor::SetSize_(const uint32_t &new_size) {
|
|||
|
||||
#ifdef ENABLE_ACL
|
||||
Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "host tensor pointer is NULL.");
|
||||
void *resHostBuf = nullptr;
|
||||
APP_ERROR ret = aclrtMallocHost(&resHostBuf, this->DeviceDataSize());
|
||||
if (ret != APP_ERR_OK) {
|
||||
|
@ -151,13 +161,18 @@ Status DeviceTensor::DataPop_(std::shared_ptr<Tensor> *host_tensor) {
|
|||
|
||||
mindspore::dataset::dsize_t dvppDataSize = this->DeviceDataSize();
|
||||
const mindspore::dataset::TensorShape dvpp_shape({dvppDataSize, 1, 1});
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(this->GetYuvStrideShape().size() >= kYuvDefaultChannels,
|
||||
"Invalid YuvShape, should greater than 4");
|
||||
|
||||
uint32_t _output_width_ = this->GetYuvStrideShape()[0];
|
||||
uint32_t _output_widthStride_ = this->GetYuvStrideShape()[1];
|
||||
uint32_t _output_height_ = this->GetYuvStrideShape()[2];
|
||||
uint32_t _output_heightStride_ = this->GetYuvStrideShape()[3];
|
||||
const mindspore::dataset::DataType dvpp_data_type(mindspore::dataset::DataType::DE_UINT8);
|
||||
|
||||
mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor);
|
||||
RETURN_IF_NOT_OK(mindspore::dataset::Tensor::CreateFromMemory(dvpp_shape, dvpp_data_type, ret_ptr, host_tensor));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(host_tensor != nullptr, "Allocate memory failed.");
|
||||
|
||||
(*host_tensor)->SetYuvShape(_output_width_, _output_widthStride_, _output_height_, _output_heightStride_);
|
||||
if (!(*host_tensor)->HasData()) {
|
||||
|
|
|
@ -39,7 +39,9 @@ struct npy_scalar_caster {
|
|||
bool load(handle src, bool convert) {
|
||||
// Taken from Eigen casters. Permits either scalar dtype or scalar array.
|
||||
handle type = dtype::of<T>().attr("type"); // Could make more efficient.
|
||||
if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) return false;
|
||||
if (!convert && !isinstance<Array>(src) && !isinstance(src, type)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Array tmp = Array::ensure(src);
|
||||
if (tmp && tmp.size() == 1 && tmp.ndim() == 0) {
|
||||
|
|
|
@ -91,8 +91,10 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
|
|||
Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, shape, type);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
|
||||
// if it's a string tensor and it has no elements, Just initialize the shape and type.
|
||||
if (!type.IsNumeric() && shape.NumOfElements() == 0) {
|
||||
return Status::OK();
|
||||
|
@ -110,7 +112,7 @@ Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, Tenso
|
|||
}
|
||||
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
|
||||
RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
|
||||
if (src != nullptr) {
|
||||
if (src != nullptr && out != nullptr) {
|
||||
// Given the shape/type of this tensor, compute the data size and copy in the input bytes.
|
||||
int64_t byte_size = (*out)->SizeInBytes();
|
||||
if (byte_size == 0) {
|
||||
|
@ -129,10 +131,11 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type,
|
|||
|
||||
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src,
|
||||
const dsize_t &length, TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr, "Pointer to source data is null.");
|
||||
RETURN_UNEXPECTED_IF_NULL(src);
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, shape, type);
|
||||
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (type.IsNumeric()) {
|
||||
dsize_t calculated_length = (*out)->SizeInBytes();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
|
||||
|
@ -160,6 +163,7 @@ Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type,
|
|||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
std::vector<dsize_t> shape;
|
||||
for (dsize_t i = 0; i < arr.ndim(); i++) {
|
||||
shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
|
||||
|
@ -168,9 +172,11 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
|
|||
std::vector<std::string> strings;
|
||||
|
||||
if (arr.dtype().kind() == 'U') {
|
||||
std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
|
||||
(void)std::for_each(arr.begin(), arr.end(),
|
||||
[&strings](const auto &s) { strings.emplace_back(py::cast<py::str>(s)); });
|
||||
} else {
|
||||
std::for_each(arr.begin(), arr.end(), [&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
|
||||
(void)std::for_each(arr.begin(), arr.end(),
|
||||
[&strings](const auto &s) { strings.emplace_back(py::cast<py::bytes>(s)); });
|
||||
}
|
||||
|
||||
arr.resize(shape); // resize arr back to the original shape
|
||||
|
@ -179,6 +185,7 @@ Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
|
|||
}
|
||||
|
||||
Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
|
||||
return CreateFromNpString(arr, out);
|
||||
}
|
||||
|
@ -192,7 +199,7 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *
|
|||
shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
|
||||
strides.push_back(static_cast<dsize_t>(arr.strides()[i]));
|
||||
// in case of empty array num_items=0
|
||||
if (count != 0) {
|
||||
if (count != 0 && shape.size() > i && shape[i] != 0) {
|
||||
count /= shape[i];
|
||||
if (strides[i] != arr.itemsize() * count) {
|
||||
is_strided = true;
|
||||
|
@ -214,9 +221,11 @@ Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *
|
|||
|
||||
#ifndef ENABLE_ANDROID
|
||||
Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
|
||||
DataType(DataType::DE_STRING));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
|
||||
// total bytes needed = offset array + strings
|
||||
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
|
||||
// strings will be null-terminated --> need 1 extra byte per element
|
||||
|
@ -237,9 +246,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
|
|||
num_bytes -= kOffsetSize;
|
||||
// insert actual string
|
||||
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
|
||||
if (ret_code != 0) {
|
||||
MS_LOG(ERROR) << "Cannot copy string into Tensor";
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Cannot copy string into Tensor");
|
||||
// next string will be stored right after the current one.
|
||||
offset = offset + str.length() + 1;
|
||||
// total bytes are reduced by the length of the string
|
||||
|
@ -258,6 +265,7 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
|
|||
#endif
|
||||
|
||||
Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
Path file(path);
|
||||
if (file.IsDirectory()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file found: " + path + ", should be file, but got directory.");
|
||||
|
@ -270,8 +278,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *
|
|||
CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file, check path: " + path);
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
|
||||
int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(),
|
||||
"Error in writing to tensor, check path: " + path);
|
||||
if (!(written_bytes == num_bytes && fs.good())) {
|
||||
fs.close();
|
||||
RETURN_STATUS_UNEXPECTED("Error in writing to tensor, check path: " + path);
|
||||
}
|
||||
fs.close();
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -279,8 +289,10 @@ Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *
|
|||
#ifndef ENABLE_ANDROID
|
||||
Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
|
||||
const DataType &type, dsize_t pad_size, TensorPtr *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
|
||||
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
|
||||
int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
|
||||
|
||||
|
@ -314,18 +326,23 @@ Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const
|
|||
// Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
|
||||
Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
|
||||
std::vector<dsize_t> strides, uint8_t type_size) {
|
||||
RETURN_UNEXPECTED_IF_NULL(dst);
|
||||
RETURN_UNEXPECTED_IF_NULL(src);
|
||||
dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
|
||||
for (dsize_t i = 0; i < size; ++i) {
|
||||
dsize_t offset = 0;
|
||||
dsize_t count = i;
|
||||
for (size_t j = 0; j < shape.size(); ++j) {
|
||||
// convert 1d array's index to 3d array's index (A -> B)
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(shape[shape.size() - 1 - j] != 0, "Invalid data, shape can't be zero.");
|
||||
dsize_t idx = count % shape[shape.size() - 1 - j];
|
||||
count /= shape[shape.size() - 1 - j];
|
||||
// calculate the raw data offset based on strides (B -> C)
|
||||
offset += idx * strides[shape.size() - 1 - j];
|
||||
// once count = 0, the following idxes are all zero, skip them
|
||||
if (count == 0) break;
|
||||
if (count == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
// strides already consider byte size of the data type, but dst doesn't.
|
||||
// dst[i] = dst + i * type_size = src + offset
|
||||
|
@ -483,6 +500,7 @@ void Tensor::Invalidate() {
|
|||
|
||||
template <typename T>
|
||||
Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(ptr);
|
||||
if (type_.IsCompatible<T>()) {
|
||||
if (data_ == nullptr) {
|
||||
std::string err = "Data is not allocated yet";
|
||||
|
@ -491,6 +509,7 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
|
|||
dsize_t flat_idx;
|
||||
RETURN_IF_NOT_OK(shape_.ToFlatIndex(index, &flat_idx));
|
||||
*ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
|
||||
RETURN_UNEXPECTED_IF_NULL(ptr);
|
||||
|
||||
return Status::OK();
|
||||
} else {
|
||||
|
@ -500,6 +519,8 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
|
|||
}
|
||||
|
||||
Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(ptr);
|
||||
RETURN_UNEXPECTED_IF_NULL(length);
|
||||
if (type_ == DataType::DE_STRING) {
|
||||
if (data_ == nullptr) {
|
||||
std::string err = "Data is not allocated yet";
|
||||
|
@ -520,6 +541,8 @@ Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset
|
|||
}
|
||||
|
||||
Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
|
||||
RETURN_UNEXPECTED_IF_NULL(start_addr_of_index);
|
||||
RETURN_UNEXPECTED_IF_NULL(remaining);
|
||||
if (type() == DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet.");
|
||||
}
|
||||
|
@ -542,6 +565,7 @@ Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_
|
|||
|
||||
Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor,
|
||||
const bool partial_insert) {
|
||||
RETURN_UNEXPECTED_IF_NULL(tensor);
|
||||
std::string err_msg;
|
||||
if (partial_insert) {
|
||||
err_msg += (ind.size() != 1)
|
||||
|
@ -604,13 +628,14 @@ Status Tensor::ExpandDim(const dsize_t &axis) {
|
|||
std::vector<dsize_t> Tensor::Strides() const {
|
||||
std::vector<dsize_t> strides = shape_.Strides();
|
||||
uint8_t size = type_.SizeInBytes();
|
||||
std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
|
||||
(void)std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
|
||||
return strides;
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(t);
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
|
||||
|
||||
std::string format_desc = t->type().GetPybindFormat();
|
||||
|
@ -623,6 +648,7 @@ Status Tensor::GetBufferInfo(Tensor *t, py::buffer_info *out) {
|
|||
t->Rank(), /* Number of dimensions */
|
||||
t->shape().AsVector(), /* Buffer dimensions */
|
||||
t->Strides());
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
@ -640,6 +666,7 @@ Status Tensor::to_json(nlohmann::json *out_json) {
|
|||
|
||||
template <typename T>
|
||||
Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(o);
|
||||
if (data_ == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
|
||||
}
|
||||
|
@ -713,6 +740,7 @@ Status Tensor::GetDataAsNumpy(py::array *data) {
|
|||
return Status::OK();
|
||||
}
|
||||
Status Tensor::GetDataAsNumpyStrings(py::array *data) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data);
|
||||
auto itr = begin<std::string_view>();
|
||||
uint64_t max_value = 0;
|
||||
for (; itr != end<std::string_view>(); ++itr) {
|
||||
|
@ -726,7 +754,9 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
|
|||
max_value = (max_value == 0 ? 1 : max_value);
|
||||
uint64_t total_size = shape_.NumOfElements() * max_value;
|
||||
char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size));
|
||||
if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
|
||||
if (tmp_data == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
|
||||
}
|
||||
int ret_code = memset_s(tmp_data, total_size, 0, total_size);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to initialize temp memory");
|
||||
|
||||
|
@ -739,9 +769,10 @@ Status Tensor::GetDataAsNumpyStrings(py::array *data) {
|
|||
}
|
||||
}
|
||||
auto strides = shape_.Strides();
|
||||
std::transform(strides.begin(), strides.end(), strides.begin(),
|
||||
[&max_value](const auto &s) { return s * max_value; });
|
||||
(void)std::transform(strides.begin(), strides.end(), strides.begin(),
|
||||
[&max_value](const auto &s) { return s * max_value; });
|
||||
*data = py::array(py::dtype("S" + std::to_string(max_value)), shape_.AsVector(), strides, tmp_data);
|
||||
RETURN_UNEXPECTED_IF_NULL(data);
|
||||
data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -751,6 +782,7 @@ void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
|
|||
|
||||
template <typename T>
|
||||
Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(o);
|
||||
if (data_ == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
|
||||
}
|
||||
|
@ -792,6 +824,7 @@ Status Tensor::GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const {
|
|||
|
||||
template <typename T>
|
||||
Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(o);
|
||||
if (data_ == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
|
||||
}
|
||||
|
@ -833,6 +866,7 @@ Status Tensor::GetSignedIntAt(T *o, const std::vector<dsize_t> &index) const {
|
|||
|
||||
template <typename T>
|
||||
Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(o);
|
||||
if (data_ == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Data is not allocated yet");
|
||||
}
|
||||
|
@ -879,6 +913,7 @@ Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length
|
|||
return Status::OK();
|
||||
}
|
||||
Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index) {
|
||||
RETURN_UNEXPECTED_IF_NULL(src);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(src->type() == type_, "Source Tensor has a different type");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(index.back() == 0, "Last dim in index should be 0");
|
||||
|
||||
|
@ -896,6 +931,7 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vect
|
|||
|
||||
Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &slice_index,
|
||||
SliceOption *slice_option_ptr) {
|
||||
RETURN_UNEXPECTED_IF_NULL(slice_option_ptr);
|
||||
if (slice_option.indices_.empty() && !slice_option.slice_.valid()) {
|
||||
RETURN_STATUS_UNEXPECTED("Both indices and slices can not be empty.");
|
||||
}
|
||||
|
@ -904,6 +940,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl
|
|||
RETURN_STATUS_UNEXPECTED("Both indices and slices can not be given.");
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(shape_.Size() > slice_index, "Invalid shape, should greater than slices index.");
|
||||
// if slice object was provided, indices should be empty. Generate indices from the slice object.
|
||||
if (slice_option.indices_.empty()) {
|
||||
// check if slice is valid
|
||||
|
@ -931,6 +968,7 @@ Status Tensor::GetSliceOption(const SliceOption &slice_option, const int32_t &sl
|
|||
}
|
||||
|
||||
Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption> slice_options_) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
std::vector<SliceOption> converted_slice_objects;
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(slice_options_.size() <= static_cast<size_t>(std::numeric_limits<dsize_t>::max()),
|
||||
|
@ -967,7 +1005,7 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption
|
|||
for (int i = 0; i < shape_.Rank(); i++) {
|
||||
if (i < slice_len) {
|
||||
// if it's a slice
|
||||
if (converted_slice_objects[i].indices_.size() == 0) {
|
||||
if (converted_slice_objects[i].indices_.size() == 0 && converted_slice_objects[i].slice_.step_ != 0) {
|
||||
slice_len_ind = (converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) /
|
||||
converted_slice_objects[i].slice_.step_;
|
||||
if ((converted_slice_objects[i].slice_.stop_ - converted_slice_objects[i].slice_.start_) %
|
||||
|
@ -1006,8 +1044,10 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<SliceOption
|
|||
|
||||
Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
|
||||
const TensorShape &shape) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
RETURN_IF_NOT_OK(CreateEmpty(shape, type_, out));
|
||||
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
(*out)->GetMutableBuffer();
|
||||
dsize_t out_index = 0;
|
||||
std::vector<dsize_t> dim_length = shape_.AsVector();
|
||||
|
@ -1052,6 +1092,7 @@ Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<std:
|
|||
}
|
||||
Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<std::vector<dsize_t>> &indices,
|
||||
const TensorShape &shape) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
std::vector<dsize_t> dim_length = shape_.AsVector();
|
||||
std::vector<std::string> strings;
|
||||
|
||||
|
|
|
@ -399,6 +399,10 @@ class Tensor {
|
|||
/// \param[in] index_vector vector of indices
|
||||
/// \return std::vector<dsize_t> modified vector of indices
|
||||
static inline std::vector<dsize_t> HandleNegIndices(std::vector<dsize_t> index_vector, std::vector<dsize_t> length) {
|
||||
if (length.size() < index_vector.size()) {
|
||||
MS_LOG(ERROR) << "The size of length should be greater than the shape of index_vector";
|
||||
return {};
|
||||
}
|
||||
std::vector<dsize_t> indices(index_vector.size(), 0);
|
||||
for (int i = 0; i < index_vector.size(); i++) {
|
||||
indices[i] = HandleNeg(index_vector[i], length[i]);
|
||||
|
@ -765,12 +769,14 @@ inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>()
|
|||
template <>
|
||||
inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
|
||||
TensorPtr *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
items.size() == shape.NumOfElements(),
|
||||
"Number of elements in the vector does not match the number of elements of the shape required");
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
|
||||
DataType(DataType::DE_STRING));
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(out != nullptr, "Allocate memory failed.");
|
||||
if (items.size() == 0) {
|
||||
if (shape.known()) {
|
||||
return (*out)->Reshape(shape);
|
||||
|
@ -820,6 +826,7 @@ inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::strin
|
|||
/// \return Status code
|
||||
template <>
|
||||
inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
|
||||
}
|
||||
} // namespace dataset
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include "minddata/dataset/core/tensor_helpers.h"
|
||||
#include "minddata/dataset/util/log_adapter.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
@ -23,6 +25,10 @@ namespace dataset {
|
|||
void IndexGeneratorHelper(int8_t depth, std::vector<dsize_t> *numbers,
|
||||
const std::vector<mindspore::dataset::SliceOption> &slice_list,
|
||||
std::vector<std::vector<dsize_t>> *matrix) {
|
||||
if (numbers == nullptr || matrix == nullptr) {
|
||||
MS_LOG(ERROR) << "Invalid input pointer, can't be NULL";
|
||||
return;
|
||||
}
|
||||
// for loop changes if its an index instead of a slice object
|
||||
if (depth > 0) {
|
||||
int8_t new_depth = depth - 1;
|
||||
|
|
|
@ -87,6 +87,7 @@ class TensorRow {
|
|||
/// \param[out] output TensorRow
|
||||
template <typename T>
|
||||
static Status ConvertToTensorRow(const std::vector<T> &o, TensorRow *output) {
|
||||
RETURN_UNEXPECTED_IF_NULL(output);
|
||||
DataType data_type = DataType::FromCType<T>();
|
||||
if (data_type == DataType::DE_UNKNOWN) {
|
||||
RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized.");
|
||||
|
@ -106,6 +107,7 @@ class TensorRow {
|
|||
/// \param[out] output TensorRow
|
||||
template <typename T>
|
||||
static Status ConvertToTensorRow(const T &o, TensorRow *output) {
|
||||
RETURN_UNEXPECTED_IF_NULL(output);
|
||||
DataType data_type = DataType::FromCType<T>();
|
||||
if (data_type == DataType::DE_UNKNOWN) {
|
||||
RETURN_STATUS_UNEXPECTED("ConvertToTensorRow: Data type was not recognized.");
|
||||
|
@ -125,6 +127,7 @@ class TensorRow {
|
|||
/// \param[out] o the primitive variable
|
||||
template <typename T>
|
||||
static Status ConvertFromTensorRow(const TensorRow &input, T *o) {
|
||||
RETURN_UNEXPECTED_IF_NULL(o);
|
||||
DataType data_type = DataType::FromCType<T>();
|
||||
RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type));
|
||||
if (input.at(0)->type() != data_type) {
|
||||
|
@ -142,6 +145,7 @@ class TensorRow {
|
|||
/// \param[out] o vector of primitive variable
|
||||
template <typename T>
|
||||
static Status ConvertFromTensorRow(const TensorRow &input, std::vector<T> *o) {
|
||||
RETURN_UNEXPECTED_IF_NULL(o);
|
||||
DataType data_type = DataType::FromCType<T>();
|
||||
RETURN_IF_NOT_OK(ValidateTensorRow(input, data_type));
|
||||
if (input.at(0)->Rank() != 1)
|
||||
|
|
|
@ -40,7 +40,7 @@ bool multi_ok(dsize_t x, dsize_t y) {
|
|||
}
|
||||
|
||||
dsize_t TensorShape::NumOfElements() const {
|
||||
if (!known()) {
|
||||
if (!known() && strides_.size() < 1) {
|
||||
return 0;
|
||||
}
|
||||
return strides_[0];
|
||||
|
@ -216,12 +216,9 @@ py::list TensorShape::AsPyList() {
|
|||
#endif
|
||||
|
||||
TensorShape TensorShape::Squeeze() const {
|
||||
std::vector<dsize_t> new_shape;
|
||||
for (auto s : AsVector()) {
|
||||
if (s != 1) {
|
||||
new_shape.push_back(s);
|
||||
}
|
||||
}
|
||||
std::vector<dsize_t> new_shape(raw_shape_.size());
|
||||
auto it = std::copy_if(raw_shape_.begin(), raw_shape_.end(), new_shape.begin(), [](auto s) { return s != 1; });
|
||||
new_shape.resize(std::distance(new_shape.begin(), it));
|
||||
return TensorShape(new_shape);
|
||||
}
|
||||
|
||||
|
@ -230,6 +227,7 @@ std::vector<dsize_t> TensorShape::Strides() const { return std::vector<dsize_t>{
|
|||
// Name: ToFlatIndex()
|
||||
// Description: convert a vector style index to number, used to access memory internal use only
|
||||
Status TensorShape::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_index) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(flat_index);
|
||||
if (index.size() != raw_shape_.size()) {
|
||||
std::stringstream ss;
|
||||
ss << "Index size (" << index.size() << ") does not match the shape size (" << raw_shape_.size() << ").";
|
||||
|
|
|
@ -101,8 +101,8 @@ Status CacheServerHW::GetNumaNodeInfo() {
|
|||
};
|
||||
// Look for name starts with 'node' and followed by digits.
|
||||
const char kNodeName[] = "node";
|
||||
while (it->hasNext()) {
|
||||
auto p = it->next();
|
||||
while (it->HasNext()) {
|
||||
auto p = it->Next();
|
||||
const std::string entry = p.Basename();
|
||||
const char *name = entry.data();
|
||||
if (strncmp(name, kNodeName, strlen(kNodeName)) == 0 && isdigit_string(name + strlen(kNodeName))) {
|
||||
|
|
|
@ -63,8 +63,8 @@ Status CachePool::DoServiceStop() {
|
|||
if (!root_.toString().empty()) {
|
||||
Path spill = GetSpillPath();
|
||||
auto it = Path::DirIterator::OpenDirectory(&spill);
|
||||
while (it->hasNext()) {
|
||||
rc = it->next().Remove();
|
||||
while (it->HasNext()) {
|
||||
rc = it->Next().Remove();
|
||||
if (rc.IsError() && rc2.IsOk()) {
|
||||
rc2 = rc;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ namespace mindspore::dataset {
|
|||
PullBasedIteratorConsumer::PullBasedIteratorConsumer() { tree_adapter_lite_ = std::make_unique<TreeAdapterLite>(); }
|
||||
|
||||
Status PullBasedIteratorConsumer::Init(std::shared_ptr<DatasetNode> root) {
|
||||
RETURN_UNEXPECTED_IF_NULL(root);
|
||||
return tree_adapter_lite_->BuildTree(std::move(root));
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
|
||||
namespace mindspore::dataset {
|
||||
Status PythonIteratorConsumer::GetNextAsList(py::list *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
std::vector<TensorPtr> row;
|
||||
{
|
||||
py::gil_scoped_release gil_release;
|
||||
|
@ -32,6 +33,7 @@ Status PythonIteratorConsumer::GetNextAsList(py::list *out) {
|
|||
}
|
||||
|
||||
Status PythonIteratorConsumer::GetNextAsDict(py::dict *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
std::vector<std::pair<std::string, std::shared_ptr<Tensor>>> vec;
|
||||
Status s;
|
||||
{
|
||||
|
@ -64,6 +66,8 @@ Status PythonTreeGetters::GetRow(TensorRow *const r) {
|
|||
return TreeGetters::GetRow(r);
|
||||
}
|
||||
Status PythonDatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *r) {
|
||||
RETURN_UNEXPECTED_IF_NULL(tree_adapter);
|
||||
RETURN_UNEXPECTED_IF_NULL(r);
|
||||
py::gil_scoped_release gil_release;
|
||||
return DatasetSizeGetter::GetRow(tree_adapter, r);
|
||||
}
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
@ -179,6 +178,8 @@ Status ToDevice::Stop() {
|
|||
}
|
||||
|
||||
Status ToDevice::GetDataInfo(std::vector<DataType> *const types, std::vector<TensorShape> *const shapes) {
|
||||
RETURN_UNEXPECTED_IF_NULL(types);
|
||||
RETURN_UNEXPECTED_IF_NULL(shapes);
|
||||
// tree_.root() must be DeviceQueueOp
|
||||
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(root != nullptr, "Root is a nullptr.");
|
||||
|
@ -218,8 +219,13 @@ Status SaveToDisk::ValidateParams() {
|
|||
MS_LOG(ERROR) << err;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err);
|
||||
}
|
||||
auto parent_path = dir.ParentPath();
|
||||
if (!parent_path.empty() && access(common::SafeCStr(parent_path), R_OK) == -1) {
|
||||
std::string real_path;
|
||||
if (Path::RealPath(dir.ParentPath(), real_path).IsError()) {
|
||||
std::string err_msg = "CreateSaver failed, can not get real dataset path: " + dir.ParentPath();
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
if (access(dir.ParentPath().c_str(), R_OK) == -1) {
|
||||
std::string err_msg = "CreateSaver failed, no access to specified dataset path: " + dataset_path_;
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
|
@ -250,15 +256,15 @@ Status SaveToDisk::Save() {
|
|||
auto mr_header = std::make_shared<mindrecord::ShardHeader>();
|
||||
auto mr_writer = std::make_unique<mindrecord::ShardWriter>();
|
||||
std::vector<std::string> blob_fields;
|
||||
if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter.");
|
||||
if (mindrecord::SUCCESS != mindrecord::ShardWriter::Initialize(&mr_writer, file_names)) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message.");
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, int32_t> column_name_id_map;
|
||||
for (auto el : tree_adapter_->GetColumnNameMap()) {
|
||||
std::string column_name = el.first;
|
||||
std::transform(column_name.begin(), column_name.end(), column_name.begin(),
|
||||
[](unsigned char c) { return ispunct(c) ? '_' : c; });
|
||||
(void)std::transform(column_name.begin(), column_name.end(), column_name.begin(),
|
||||
[](unsigned char c) { return ispunct(c) ? '_' : c; });
|
||||
column_name_id_map[column_name] = el.second;
|
||||
}
|
||||
|
||||
|
@ -281,17 +287,21 @@ Status SaveToDisk::Save() {
|
|||
RETURN_IF_NOT_OK(FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields));
|
||||
MS_LOG(INFO) << "Schema of saved mindrecord: " << mr_json.dump();
|
||||
if (mindrecord::SUCCESS !=
|
||||
mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
|
||||
mindrecord::ShardHeader::Initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
|
||||
}
|
||||
mr_writer->SetShardHeader(mr_header);
|
||||
if (mindrecord::SUCCESS != mr_writer->SetShardHeader(mr_header)) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to set header of ShardWriter.");
|
||||
}
|
||||
first_loop = false;
|
||||
}
|
||||
// construct data
|
||||
if (!row.empty()) { // write data
|
||||
RETURN_IF_NOT_OK(FetchDataFromTensorRow(row, column_name_id_map, &row_raw_data, &row_bin_data));
|
||||
std::shared_ptr<std::vector<uint8_t>> output_bin_data;
|
||||
mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data);
|
||||
if (mindrecord::SUCCESS != mr_writer->MergeBlobData(blob_fields, row_bin_data, &output_bin_data)) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to merge blob data of ShardWriter.");
|
||||
}
|
||||
std::map<std::uint64_t, std::vector<nlohmann::json>> raw_data;
|
||||
raw_data.insert(
|
||||
std::pair<uint64_t, std::vector<nlohmann::json>>(mr_schema_id, std::vector<nlohmann::json>{row_raw_data}));
|
||||
|
@ -299,12 +309,16 @@ Status SaveToDisk::Save() {
|
|||
if (output_bin_data != nullptr) {
|
||||
bin_data.emplace_back(*output_bin_data);
|
||||
}
|
||||
mr_writer->WriteRawData(raw_data, bin_data);
|
||||
if (mindrecord::SUCCESS != mr_writer->WriteRawData(raw_data, bin_data)) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to write raw data to ShardWriter.");
|
||||
}
|
||||
}
|
||||
} while (!row.empty());
|
||||
|
||||
mr_writer->Commit();
|
||||
if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::finalize(file_names)) {
|
||||
if (mindrecord::SUCCESS != mr_writer->Commit()) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to commit ShardWriter.");
|
||||
}
|
||||
if (mindrecord::SUCCESS != mindrecord::ShardIndexGenerator::Finalize(file_names)) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: failed to finalize ShardIndexGenerator.");
|
||||
}
|
||||
return Status::OK();
|
||||
|
@ -407,7 +421,7 @@ Status SaveToDisk::FetchMetaFromTensorRow(const std::unordered_map<std::string,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
static Status ValidateInputParams(nlohmann::json *row_raw_data,
|
||||
inline Status ValidateInputParams(nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data,
|
||||
const std::unordered_map<std::string, int32_t> &column_name_id_map) {
|
||||
if (row_raw_data == nullptr) {
|
||||
|
@ -424,6 +438,8 @@ static Status ValidateInputParams(nlohmann::json *row_raw_data,
|
|||
|
||||
Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
|
||||
std::unique_ptr<std::vector<uint8_t>> *data_ptr) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row_raw_data);
|
||||
RETURN_UNEXPECTED_IF_NULL(data_ptr);
|
||||
auto column_type = tensor->type();
|
||||
Status s;
|
||||
if (column_type == DataType::DE_FLOAT32) {
|
||||
|
@ -442,6 +458,9 @@ Status SaveToDisk::FetchFloatData(std::shared_ptr<Tensor> tensor, std::string co
|
|||
|
||||
Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string column_name, nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
|
||||
RETURN_UNEXPECTED_IF_NULL(tensor);
|
||||
RETURN_UNEXPECTED_IF_NULL(row_raw_data);
|
||||
RETURN_UNEXPECTED_IF_NULL(row_bin_data);
|
||||
auto column_type = tensor->type();
|
||||
Status s;
|
||||
std::unique_ptr<std::vector<uint8_t>> data_ptr;
|
||||
|
@ -492,7 +511,6 @@ Status SaveToDisk::FetchItemData(std::shared_ptr<Tensor> tensor, std::string col
|
|||
RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {})); // assume scalar string tensor
|
||||
std::string ss(sv);
|
||||
(*row_raw_data)[column_name] = std::move(ss);
|
||||
return Status::OK();
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Got unexpected type when casting data.");
|
||||
}
|
||||
|
@ -506,6 +524,8 @@ Status SaveToDisk::FetchDataFromTensorRow(const TensorRow &row,
|
|||
const std::unordered_map<std::string, int32_t> &column_name_id_map,
|
||||
nlohmann::json *row_raw_data,
|
||||
std::map<std::string, std::unique_ptr<std::vector<uint8_t>>> *row_bin_data) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row_raw_data);
|
||||
RETURN_UNEXPECTED_IF_NULL(row_bin_data);
|
||||
Status s;
|
||||
s = ValidateInputParams(row_raw_data, row_bin_data, column_name_id_map);
|
||||
if (s.IsError()) {
|
||||
|
@ -525,9 +545,11 @@ template <typename T, typename S>
|
|||
Status SaveToDisk::TransformTensor(const unsigned char *src, const TensorShape &shape, const int64_t num_of_elements,
|
||||
std::unique_ptr<T> *data, std::unique_ptr<std::vector<uint8_t>> *data_ptr,
|
||||
std::unique_ptr<S> *s, bool need_convert) {
|
||||
if (nullptr == src) {
|
||||
RETURN_STATUS_UNEXPECTED("Error: buffer of Tensor is NULL.");
|
||||
}
|
||||
RETURN_UNEXPECTED_IF_NULL(src);
|
||||
RETURN_UNEXPECTED_IF_NULL(data);
|
||||
RETURN_UNEXPECTED_IF_NULL(data_ptr);
|
||||
RETURN_UNEXPECTED_IF_NULL(s);
|
||||
|
||||
*data_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(T));
|
||||
if (need_convert) {
|
||||
auto tmp_ptr = std::make_unique<std::vector<uint8_t>>(num_of_elements * sizeof(S));
|
||||
|
@ -560,25 +582,32 @@ TreeGetters::TreeGetters() : dataset_size_(-1), init_flag_(false), first_row_obt
|
|||
}
|
||||
|
||||
Status TreeGetters::Init(std::shared_ptr<DatasetNode> d) {
|
||||
RETURN_UNEXPECTED_IF_NULL(d);
|
||||
root_ = std::move(d);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status TreeGetters::GetRow(TensorRow *row) { return tree_adapter_->GetNext(row); }
|
||||
Status TreeGetters::GetRow(TensorRow *row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
return tree_adapter_->GetNext(row);
|
||||
}
|
||||
|
||||
Status TreeGetters::GetOutputTypes(std::vector<DataType> *types) {
|
||||
RETURN_UNEXPECTED_IF_NULL(types);
|
||||
RETURN_IF_NOT_OK(GetFirstRowShapeAndType());
|
||||
*types = first_row_type_;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status TreeGetters::GetOutputShapes(std::vector<TensorShape> *shapes) {
|
||||
RETURN_UNEXPECTED_IF_NULL(shapes);
|
||||
RETURN_IF_NOT_OK(GetFirstRowShapeAndType());
|
||||
*shapes = first_row_shape_;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status TreeGetters::GetBatchSize(int64_t *batch_size) {
|
||||
RETURN_UNEXPECTED_IF_NULL(batch_size);
|
||||
RETURN_IF_NOT_OK(InternalInit());
|
||||
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
|
||||
RETURN_UNEXPECTED_IF_NULL(root);
|
||||
|
@ -588,6 +617,7 @@ Status TreeGetters::GetBatchSize(int64_t *batch_size) {
|
|||
}
|
||||
|
||||
Status TreeGetters::GetRepeatCount(int64_t *repeat_count) {
|
||||
RETURN_UNEXPECTED_IF_NULL(repeat_count);
|
||||
RETURN_IF_NOT_OK(InternalInit());
|
||||
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
|
||||
RETURN_UNEXPECTED_IF_NULL(root);
|
||||
|
@ -596,6 +626,7 @@ Status TreeGetters::GetRepeatCount(int64_t *repeat_count) {
|
|||
}
|
||||
|
||||
Status TreeGetters::GetNumClasses(int64_t *num_classes) {
|
||||
RETURN_UNEXPECTED_IF_NULL(num_classes);
|
||||
RETURN_IF_NOT_OK(InternalInit());
|
||||
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
|
||||
RETURN_UNEXPECTED_IF_NULL(root);
|
||||
|
@ -604,6 +635,7 @@ Status TreeGetters::GetNumClasses(int64_t *num_classes) {
|
|||
}
|
||||
|
||||
Status TreeGetters::GetColumnNames(std::vector<std::string> *output) {
|
||||
RETURN_UNEXPECTED_IF_NULL(output);
|
||||
RETURN_IF_NOT_OK(InternalInit());
|
||||
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
|
||||
RETURN_UNEXPECTED_IF_NULL(root);
|
||||
|
@ -620,6 +652,7 @@ Status TreeGetters::GetColumnNames(std::vector<std::string> *output) {
|
|||
}
|
||||
|
||||
Status TreeGetters::GetClassIndexing(std::vector<std::pair<std::string, std::vector<int32_t>>> *output_class_indexing) {
|
||||
RETURN_UNEXPECTED_IF_NULL(output_class_indexing);
|
||||
RETURN_IF_NOT_OK(InternalInit());
|
||||
std::shared_ptr<DatasetOp> root = std::shared_ptr<DatasetOp>(tree_adapter_->GetRoot());
|
||||
RETURN_UNEXPECTED_IF_NULL(root);
|
||||
|
@ -671,6 +704,7 @@ Status DatasetSizeGetter::Init(std::shared_ptr<DatasetNode> d) {
|
|||
return Status::OK();
|
||||
}
|
||||
Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *dataset_size) {
|
||||
RETURN_UNEXPECTED_IF_NULL(dataset_size);
|
||||
std::shared_ptr<TreeAdapter> tree_adapter = std::make_shared<TreeAdapter>(TreeAdapter::UsageFlag::kDeGetter);
|
||||
tree_adapters_.push_back(tree_adapter);
|
||||
RETURN_IF_NOT_OK(tree_adapter->Compile(ir_node, 1));
|
||||
|
@ -685,6 +719,7 @@ Status DatasetSizeGetter::DryRun(std::shared_ptr<DatasetNode> ir_node, int64_t *
|
|||
return Status::OK();
|
||||
}
|
||||
Status DatasetSizeGetter::GetRow(const std::shared_ptr<TreeAdapter> &tree_adapter, TensorRow *row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
return tree_adapter->GetNext(row);
|
||||
}
|
||||
Status DatasetSizeGetter::Terminate() {
|
||||
|
|
|
@ -73,7 +73,7 @@ ColDescriptor::ColDescriptor(const std::string &col_name, DataType col_type, Ten
|
|||
ColDescriptor::ColDescriptor(const ColDescriptor &in_cd)
|
||||
: type_(in_cd.type_), rank_(in_cd.rank_), tensor_impl_(in_cd.tensor_impl_), col_name_(in_cd.col_name_) {
|
||||
// If it has a tensor shape, make a copy of it with our own unique_ptr.
|
||||
tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr;
|
||||
tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr;
|
||||
}
|
||||
|
||||
// Assignment overload
|
||||
|
@ -84,7 +84,7 @@ ColDescriptor &ColDescriptor::operator=(const ColDescriptor &in_cd) {
|
|||
tensor_impl_ = in_cd.tensor_impl_;
|
||||
col_name_ = in_cd.col_name_;
|
||||
// If it has a tensor shape, make a copy of it with our own unique_ptr.
|
||||
tensor_shape_ = in_cd.hasShape() ? std::make_unique<TensorShape>(in_cd.shape()) : nullptr;
|
||||
tensor_shape_ = in_cd.HasShape() ? std::make_unique<TensorShape>(in_cd.Shape()) : nullptr;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
@ -113,7 +113,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape *
|
|||
|
||||
// If the shape is not given in this column, then we assume the shape will be: {numElements}
|
||||
if (tensor_shape_ == nullptr) {
|
||||
if (this->rank() == 0 && num_elements == 1) {
|
||||
if (this->Rank() == 0 && num_elements == 1) {
|
||||
*out_shape = TensorShape::CreateScalar();
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ Status ColDescriptor::MaterializeTensorShape(int32_t num_elements, TensorShape *
|
|||
}
|
||||
|
||||
// getter function for the shape
|
||||
TensorShape ColDescriptor::shape() const {
|
||||
TensorShape ColDescriptor::Shape() const {
|
||||
if (tensor_shape_ != nullptr) {
|
||||
return *tensor_shape_; // copy construct a shape to return
|
||||
} else {
|
||||
|
@ -255,7 +255,7 @@ Status DataSchema::ColumnOrderLoad(nlohmann::json column_tree, const std::vector
|
|||
}
|
||||
|
||||
// Internal helper function for parsing shape info and building a vector for the shape construction.
|
||||
static Status buildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) {
|
||||
static Status BuildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *outShape) {
|
||||
if (outShape == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("null output shape");
|
||||
}
|
||||
|
@ -272,7 +272,8 @@ static Status buildShape(const nlohmann::json &shapeVal, std::vector<dsize_t> *o
|
|||
Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::string &col_name) {
|
||||
int32_t rank_value = -1;
|
||||
TensorImpl t_impl_value = TensorImpl::kFlexible;
|
||||
std::string name, type_str;
|
||||
std::string name = "";
|
||||
std::string type_str = "";
|
||||
std::vector<dsize_t> tmp_shape = {};
|
||||
bool shape_field_exists = false;
|
||||
// Iterate over this column's attributes.
|
||||
|
@ -289,7 +290,7 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin
|
|||
STR_TO_TENSORIMPL(it_child.value(), t_impl_value);
|
||||
} else if (it_child.key() == "shape") {
|
||||
shape_field_exists = true;
|
||||
RETURN_IF_NOT_OK(buildShape(it_child.value(), &tmp_shape));
|
||||
RETURN_IF_NOT_OK(BuildShape(it_child.value(), &tmp_shape));
|
||||
} else {
|
||||
std::string err_msg = "Unexpected column attribute " + it_child.key() + " for column " + col_name;
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
|
@ -322,10 +323,10 @@ Status DataSchema::ColumnLoad(nlohmann::json column_child_tree, const std::strin
|
|||
// Create the column descriptor for this column from the data we pulled from the json file
|
||||
TensorShape col_shape = TensorShape(tmp_shape);
|
||||
if (shape_field_exists)
|
||||
(void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape));
|
||||
RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value, &col_shape)));
|
||||
else
|
||||
// Create a column descriptor that doesn't have a shape
|
||||
(void)this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value));
|
||||
RETURN_IF_NOT_OK(this->AddColumn(ColDescriptor(name, DataType(type_str), t_impl_value, rank_value)));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -343,19 +344,30 @@ Status DataSchema::LoadSchemaFile(const std::string &schema_file_path,
|
|||
} catch (nlohmann::json::out_of_range &e) {
|
||||
num_rows_ = 0;
|
||||
} catch (nlohmann::json::exception &e) {
|
||||
in.close();
|
||||
RETURN_STATUS_UNEXPECTED("Unable to parse \"numRows\" from schema");
|
||||
}
|
||||
nlohmann::json column_tree = js.at("columns");
|
||||
if (column_tree.empty()) {
|
||||
in.close();
|
||||
RETURN_STATUS_UNEXPECTED("columns is null");
|
||||
}
|
||||
if (columns_to_load.empty()) {
|
||||
// Parse the json tree and load the schema's columns in whatever order that the json
|
||||
// layout decides
|
||||
RETURN_IF_NOT_OK(this->AnyOrderLoad(column_tree));
|
||||
Status rc = this->AnyOrderLoad(column_tree);
|
||||
if (rc.IsError()) {
|
||||
in.close();
|
||||
return rc;
|
||||
}
|
||||
} else {
|
||||
RETURN_IF_NOT_OK(this->ColumnOrderLoad(column_tree, columns_to_load));
|
||||
Status rc = this->ColumnOrderLoad(column_tree, columns_to_load);
|
||||
if (rc.IsError()) {
|
||||
in.close();
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
in.close();
|
||||
} catch (const std::exception &err) {
|
||||
// Catch any exception and convert to Status return code
|
||||
RETURN_STATUS_UNEXPECTED("Schema file failed to load with JSON tools. File is: " + schema_file_path);
|
||||
|
@ -392,7 +404,7 @@ Status DataSchema::LoadSchemaString(const std::string &schema_json_string,
|
|||
DataSchema::~DataSchema() = default;
|
||||
|
||||
// Getter for the ColDescriptor by index
|
||||
const ColDescriptor &DataSchema::column(int32_t idx) const {
|
||||
const ColDescriptor &DataSchema::Column(int32_t idx) const {
|
||||
MS_ASSERT(idx < static_cast<int>(col_descs_.size()));
|
||||
return col_descs_[idx];
|
||||
}
|
||||
|
@ -409,9 +421,9 @@ void DataSchema::Print(std::ostream &out) const {
|
|||
Status DataSchema::AddColumn(const ColDescriptor &cd) {
|
||||
// Sanity check there's not a duplicate name before adding the column
|
||||
for (auto i = 0; i < col_descs_.size(); ++i) {
|
||||
if (col_descs_[i].name() == cd.name()) {
|
||||
if (col_descs_[i].Name() == cd.Name()) {
|
||||
std::ostringstream ss;
|
||||
ss << "column name '" << cd.name() << "' already exists in schema.";
|
||||
ss << "column name '" << cd.Name() << "' already exists in schema.";
|
||||
std::string err_msg = ss.str();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
@ -437,11 +449,11 @@ Status DataSchema::GetColumnNameMap(std::unordered_map<std::string, int32_t> *ou
|
|||
}
|
||||
|
||||
for (size_t i = 0; i < col_descs_.size(); ++i) {
|
||||
if (col_descs_[i].name().empty()) {
|
||||
if (col_descs_[i].Name().empty()) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__,
|
||||
"Constructing column name map from schema, but found empty column name.");
|
||||
}
|
||||
(*out_column_name_map)[col_descs_[i].name()] = i;
|
||||
(*out_column_name_map)[col_descs_[i].Name()] = i;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
|
|
|
@ -81,27 +81,27 @@ class ColDescriptor {
|
|||
|
||||
/// \brief getter function
|
||||
/// \return The column's DataType
|
||||
DataType type() const { return type_; }
|
||||
DataType Type() const { return type_; }
|
||||
|
||||
/// \brief getter function
|
||||
/// \return The column's rank
|
||||
int32_t rank() const { return rank_; }
|
||||
int32_t Rank() const { return rank_; }
|
||||
|
||||
/// \brief getter function
|
||||
/// \return The column's name
|
||||
std::string name() const { return col_name_; }
|
||||
std::string Name() const { return col_name_; }
|
||||
|
||||
/// \brief getter function
|
||||
/// \return The column's shape
|
||||
TensorShape shape() const;
|
||||
TensorShape Shape() const;
|
||||
|
||||
/// \brief getter function
|
||||
/// \return TF if the column has an assigned fixed shape.
|
||||
bool hasShape() const { return tensor_shape_ != nullptr; }
|
||||
bool HasShape() const { return tensor_shape_ != nullptr; }
|
||||
|
||||
/// \brief getter function
|
||||
/// \return The column's tensor implementation type
|
||||
TensorImpl tensorImpl() const { return tensor_impl_; }
|
||||
TensorImpl GetTensorImpl() const { return tensor_impl_; }
|
||||
|
||||
private:
|
||||
DataType type_; // The columns type
|
||||
|
@ -153,7 +153,7 @@ class DataSchema {
|
|||
|
||||
/// \brief getter
|
||||
/// \return The reference to a ColDescriptor to get (const version)
|
||||
const ColDescriptor &column(int32_t idx) const;
|
||||
const ColDescriptor &Column(int32_t idx) const;
|
||||
|
||||
/// \brief getter
|
||||
/// \return The number of columns in the schema
|
||||
|
@ -163,7 +163,7 @@ class DataSchema {
|
|||
|
||||
/// \brief getter
|
||||
/// \return The number of rows read from schema
|
||||
int64_t num_rows() const { return num_rows_; }
|
||||
int64_t NumRows() const { return num_rows_; }
|
||||
|
||||
static const char DEFAULT_DATA_SCHEMA_FILENAME[];
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/engine/dataset_iterator.h"
|
||||
#include <algorithm>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "minddata/dataset/core/data_type.h"
|
||||
|
|
|
@ -49,7 +49,7 @@ class DatasetIterator {
|
|||
// @return The string to column id mapping.
|
||||
std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
|
||||
|
||||
bool eof_handled() const { return eof_handled_; }
|
||||
bool EofHandled() const { return eof_handled_; }
|
||||
|
||||
// Fetches one row of data from the iterator.
|
||||
// the base class version simply performs error handling and returns empty row. Actual
|
||||
|
@ -108,11 +108,11 @@ class ChildIterator {
|
|||
std::unordered_map<std::string, int32_t> GetColumnNameMap() const;
|
||||
|
||||
// Return T/F if end of epoch
|
||||
bool end_of_epoch() { return end_epoch_; }
|
||||
bool EndOfEpoch() { return end_epoch_; }
|
||||
|
||||
// Getter
|
||||
// @return T/F if this iterator is completely done after getting an eof
|
||||
bool eof_handled() const { return eof_handled_; }
|
||||
bool EofHandled() const { return eof_handled_; }
|
||||
|
||||
private:
|
||||
DatasetOp *current_op_; // The parent operator. We consume from it's children.
|
||||
|
|
|
@ -113,6 +113,7 @@ Status BarrierOp::blockCond() {
|
|||
|
||||
// fetches next Barrier row
|
||||
Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(new_row);
|
||||
// iterate over all iterators and generate a row
|
||||
RETURN_IF_NOT_OK((child_iterator_)->FetchNextTensorRow(new_row));
|
||||
// add each new row to iterator, check if row is empty, if row from iterator is empty return empty row
|
||||
|
@ -122,7 +123,7 @@ Status BarrierOp::getNextTensorRow(TensorRow *new_row) {
|
|||
MS_LOG(INFO) << "Barrier operator child iterator produced empty row.";
|
||||
clean_up_ = true;
|
||||
// If we picked up an eof here, then we are completely done.
|
||||
if ((child_iterator_)->eof_handled()) {
|
||||
if ((child_iterator_)->EofHandled()) {
|
||||
MS_LOG(INFO) << "Barrier operator iterator got EOF.";
|
||||
eof_ = true;
|
||||
}
|
||||
|
|
|
@ -36,6 +36,7 @@ BatchOp::Builder::Builder(int32_t batch_size) : builder_drop_(false), builder_pa
|
|||
}
|
||||
|
||||
Status BatchOp::Builder::Build(std::shared_ptr<BatchOp> *ptr) {
|
||||
RETURN_UNEXPECTED_IF_NULL(ptr);
|
||||
#ifdef ENABLE_PYTHON
|
||||
*ptr = std::make_shared<BatchOp>(builder_batch_size_, builder_drop_, builder_pad_, builder_op_connector_size_,
|
||||
builder_num_workers_, builder_in_names_, builder_out_names_,
|
||||
|
@ -106,7 +107,7 @@ Status BatchOp::operator()() {
|
|||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
int32_t cur_batch_size = 0;
|
||||
RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0)));
|
||||
while (child_iterator_->eof_handled() == false) {
|
||||
while (child_iterator_->EofHandled() == false) {
|
||||
while (new_row.empty() == false) {
|
||||
table->emplace_back(new_row);
|
||||
// if # of rows is enough to make 1 batch, send it to worker_queue
|
||||
|
@ -142,7 +143,7 @@ Status BatchOp::operator()() {
|
|||
<< "reduce memory usage.";
|
||||
}
|
||||
#endif
|
||||
} // end of eof_handled() == false
|
||||
} // end of EofHandled() == false
|
||||
RETURN_IF_NOT_OK(
|
||||
worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOF))));
|
||||
// EOF received, send quit signal to all workers
|
||||
|
@ -168,6 +169,8 @@ void BatchOp::Print(std::ostream &out, bool show_all) const {
|
|||
}
|
||||
|
||||
Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, TensorRow *dest, dsize_t batch_size) {
|
||||
RETURN_UNEXPECTED_IF_NULL(src);
|
||||
RETURN_UNEXPECTED_IF_NULL(dest);
|
||||
if ((*src)->size() != batch_size) {
|
||||
RETURN_STATUS_UNEXPECTED("[Internal ERROR] Source table size does not match the batch_size.");
|
||||
}
|
||||
|
@ -274,6 +277,8 @@ Status BatchOp::EoeReceived(int32_t) {
|
|||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> *table_pair) {
|
||||
RETURN_UNEXPECTED_IF_NULL(table_pair);
|
||||
RETURN_UNEXPECTED_IF_NULL(table_pair->first);
|
||||
std::unique_ptr<TensorQTable> in_q_table = std::move(table_pair->first);
|
||||
size_t num_rows = in_q_table->size();
|
||||
auto out_q_table = std::make_unique<TensorQTable>(num_rows, TensorRow(column_name_id_map_.size(), nullptr));
|
||||
|
@ -316,6 +321,7 @@ Status BatchOp::MapColumns(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo>
|
|||
#endif
|
||||
|
||||
Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
|
||||
RETURN_UNEXPECTED_IF_NULL(batch_size);
|
||||
#ifdef ENABLE_PYTHON
|
||||
if (batch_size_func_) {
|
||||
RETURN_IF_NOT_OK(InvokeBatchSizeFunc(batch_size, info));
|
||||
|
@ -330,6 +336,7 @@ Status BatchOp::GetBatchSize(int32_t *batch_size, CBatchInfo info) {
|
|||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
|
||||
RETURN_UNEXPECTED_IF_NULL(batch_size);
|
||||
{
|
||||
// Acquire Python GIL
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
|
@ -355,6 +362,8 @@ Status BatchOp::InvokeBatchSizeFunc(int32_t *batch_size, CBatchInfo info) {
|
|||
}
|
||||
|
||||
Status BatchOp::InvokeBatchMapFunc(TensorTable *input, TensorTable *output, CBatchInfo info) {
|
||||
RETURN_UNEXPECTED_IF_NULL(input);
|
||||
RETURN_UNEXPECTED_IF_NULL(output);
|
||||
{
|
||||
// Acquire Python GIL
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
|
@ -471,6 +480,9 @@ Status BatchOp::UnpackPadInfo(const PadInfo &pad_info,
|
|||
const std::unordered_map<std::string, int32_t> &column_name_id_map,
|
||||
std::set<int32_t> *pad_cols, std::vector<std::shared_ptr<Tensor>> *pad_vals,
|
||||
std::vector<std::vector<dsize_t>> *pad_shapes) {
|
||||
RETURN_UNEXPECTED_IF_NULL(pad_cols);
|
||||
RETURN_UNEXPECTED_IF_NULL(pad_vals);
|
||||
RETURN_UNEXPECTED_IF_NULL(pad_shapes);
|
||||
if (pad_info.empty()) { // if pad_info empty, pad every columns automatically
|
||||
for (size_t col_id = 0; col_id < column_name_id_map.size(); col_id++) {
|
||||
pad_cols->insert(col_id);
|
||||
|
@ -561,6 +573,7 @@ int64_t BatchOp::GetTreeBatchSize() {
|
|||
}
|
||||
|
||||
Status BatchOp::GetNextRowPullMode(TensorRow *const row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
|
||||
child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
|
||||
int32_t cur_batch_size = 0;
|
||||
|
|
|
@ -60,7 +60,7 @@ Status BucketBatchByLengthOp::operator()() {
|
|||
TensorRow current_row;
|
||||
child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(¤t_row));
|
||||
while (!child_iterator_->eof_handled()) {
|
||||
while (!child_iterator_->EofHandled()) {
|
||||
while (!current_row.empty()) {
|
||||
int32_t element_length;
|
||||
RETURN_IF_NOT_OK(ObtainElementLength(&element_length, current_row));
|
||||
|
@ -99,6 +99,7 @@ Status BucketBatchByLengthOp::operator()() {
|
|||
}
|
||||
|
||||
Status BucketBatchByLengthOp::ObtainElementLength(int32_t *out_element_length, TensorRow element) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out_element_length);
|
||||
// call pyfunc here if given pyfunc, otherwise return 0th dimension of shape of
|
||||
// the single column specified in length_dependent_columns_
|
||||
if (element_length_function_) {
|
||||
|
|
|
@ -52,7 +52,7 @@ Status BuildSentencePieceVocabOp::operator()() {
|
|||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
|
||||
bool eoe_warning = false; // give out warning if receive more than 1 eoe
|
||||
while (child_iterator_->eof_handled() == false) {
|
||||
while (child_iterator_->EofHandled() == false) {
|
||||
while (new_row.empty() == false) {
|
||||
RETURN_IF_NOT_OK(sentence_queue_->EmplaceBack(new_row));
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
|
|
|
@ -107,7 +107,7 @@ Status BuildVocabOp::operator()() {
|
|||
}
|
||||
}
|
||||
bool eoe_warning = false; // give out warning if receive more than 1 eoe
|
||||
while (child_iterator_->eof_handled() == false) {
|
||||
while (child_iterator_->EofHandled() == false) {
|
||||
while (new_row.empty() == false) {
|
||||
RETURN_IF_NOT_OK(distributor_queue_->EmplaceBack(new_row));
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
|
|
|
@ -593,7 +593,7 @@ Status DeviceQueueOp::SendDataToCPU() {
|
|||
MS_LOG(INFO) << "Device queue, sending data to CPU.";
|
||||
int64_t total_batch = 0;
|
||||
|
||||
while (!(child_iterator_->eof_handled())) {
|
||||
while (!(child_iterator_->EofHandled())) {
|
||||
TensorRow curr_row;
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&curr_row));
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@ Status FilterOp::operator()() {
|
|||
TensorRow new_row;
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
int64_t cnt = 0;
|
||||
while (child_iterator_->eof_handled() == false) {
|
||||
while (child_iterator_->EofHandled() == false) {
|
||||
while (new_row.empty() == false) {
|
||||
RETURN_IF_NOT_OK(worker_queues_[cnt % num_workers_]->EmplaceBack(new_row));
|
||||
cnt++;
|
||||
|
|
|
@ -124,7 +124,7 @@ Status ShuffleOp::operator()() {
|
|||
RETURN_IF_NOT_OK(InitShuffleBuffer());
|
||||
|
||||
// This is our main loop exit condition, when the iterator has no more data completely.
|
||||
if (child_iterator_->eof_handled()) {
|
||||
if (child_iterator_->EofHandled()) {
|
||||
RETURN_IF_NOT_OK(out_connector_->SendEOF());
|
||||
break;
|
||||
}
|
||||
|
@ -214,7 +214,7 @@ Status ShuffleOp::InitShuffleBuffer() {
|
|||
TensorRow new_row;
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
|
||||
if (child_iterator_->eof_handled()) {
|
||||
if (child_iterator_->EofHandled()) {
|
||||
MS_LOG(DEBUG) << "Shuffle operator init picked up EOF. No more epochs.";
|
||||
RETURN_IF_NOT_OK(out_connector_->SendEOF());
|
||||
return Status::OK();
|
||||
|
|
|
@ -43,7 +43,7 @@ AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, boo
|
|||
curr_row_(0) {
|
||||
// Set the column name map (base class field)
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
io_block_queues_.Init(num_workers_, queue_size);
|
||||
}
|
||||
|
@ -70,8 +70,8 @@ Status AlbumOp::PrescanEntry() {
|
|||
}
|
||||
MS_LOG(INFO) << "Album folder Path found: " << folder_path_ << ".";
|
||||
|
||||
while (dirItr->hasNext()) {
|
||||
Path file = dirItr->next();
|
||||
while (dirItr->HasNext()) {
|
||||
Path file = dirItr->Next();
|
||||
if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
|
||||
(void)image_rows_.push_back(file.toString().substr(dirname_offset_));
|
||||
} else {
|
||||
|
@ -192,7 +192,7 @@ Status AlbumOp::LoadStringTensor(const nlohmann::json &json_obj, int32_t col_num
|
|||
Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
|
||||
TensorPtr label;
|
||||
// consider templating this function to handle all ints
|
||||
if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
|
||||
if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
|
||||
std::vector<int64_t> data;
|
||||
|
||||
// Iterate over the integer list and add those values to the output shape tensor
|
||||
|
@ -201,7 +201,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
|
|||
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
|
||||
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
|
||||
} else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
|
||||
std::vector<int32_t> data;
|
||||
|
||||
// Iterate over the integer list and add those values to the output shape tensor
|
||||
|
@ -212,7 +212,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
|
|||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &label));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither int32 nor int64, it is " +
|
||||
data_schema_->column(col_num).type().ToString());
|
||||
data_schema_->Column(col_num).Type().ToString());
|
||||
}
|
||||
row->push_back(std::move(label));
|
||||
return Status::OK();
|
||||
|
@ -221,7 +221,7 @@ Status AlbumOp::LoadIntArrayTensor(const nlohmann::json &json_obj, int32_t col_n
|
|||
Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
|
||||
TensorPtr float_array;
|
||||
// consider templating this function to handle all ints
|
||||
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
|
||||
if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
|
||||
std::vector<double> data;
|
||||
|
||||
// Iterate over the integer list and add those values to the output shape tensor
|
||||
|
@ -230,7 +230,7 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
|
|||
(void)std::transform(items.begin(), items.end(), std::back_inserter(data), [](it_type j) { return j.value(); });
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
|
||||
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
|
||||
} else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
|
||||
std::vector<float> data;
|
||||
|
||||
// Iterate over the integer list and add those values to the output shape tensor
|
||||
|
@ -241,14 +241,15 @@ Status AlbumOp::LoadFloatArrayTensor(const nlohmann::json &json_obj, int32_t col
|
|||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(data, &float_array));
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, column type in data_schema is neither float32 nor float64, it is " +
|
||||
data_schema_->column(col_num).type().ToString());
|
||||
data_schema_->Column(col_num).Type().ToString());
|
||||
}
|
||||
row->push_back(std::move(float_array));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow *row) {
|
||||
if (data_schema_->column(col_num).type() == DataType::DE_STRING) {
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
if (data_schema_->Column(col_num).Type() == DataType::DE_STRING) {
|
||||
TensorPtr id;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar<std::string>(file, &id));
|
||||
row->push_back(std::move(id));
|
||||
|
@ -266,7 +267,7 @@ Status AlbumOp::LoadIDTensor(const std::string &file, int32_t col_num, TensorRow
|
|||
Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) {
|
||||
// hack to get the file name without extension, the 1 is to get rid of the backslash character
|
||||
TensorPtr empty_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->column(col_num).type(), &empty_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({0}), data_schema_->Column(col_num).Type(), &empty_tensor));
|
||||
row->push_back(std::move(empty_tensor));
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -277,11 +278,11 @@ Status AlbumOp::LoadEmptyTensor(int32_t col_num, TensorRow *row) {
|
|||
// only be float32, seems like a weird limitation to impose
|
||||
Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
|
||||
TensorPtr float_tensor;
|
||||
if (data_schema_->column(col_num).type() == DataType::DE_FLOAT64) {
|
||||
if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT64) {
|
||||
double data = json_obj;
|
||||
MS_LOG(INFO) << "double found: " << json_obj << ".";
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar<double>(data, &float_tensor));
|
||||
} else if (data_schema_->column(col_num).type() == DataType::DE_FLOAT32) {
|
||||
} else if (data_schema_->Column(col_num).Type() == DataType::DE_FLOAT32) {
|
||||
float data1 = json_obj;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar<float>(data1, &float_tensor));
|
||||
MS_LOG(INFO) << "float found: " << json_obj << ".";
|
||||
|
@ -293,11 +294,11 @@ Status AlbumOp::LoadFloatTensor(const nlohmann::json &json_obj, int32_t col_num,
|
|||
// Loads a tensor with int value, we have to cast the value to type specified in the schema.
|
||||
Status AlbumOp::LoadIntTensor(const nlohmann::json &json_obj, int32_t col_num, TensorRow *row) {
|
||||
TensorPtr int_tensor;
|
||||
if (data_schema_->column(col_num).type() == DataType::DE_INT64) {
|
||||
if (data_schema_->Column(col_num).Type() == DataType::DE_INT64) {
|
||||
int64_t data = json_obj;
|
||||
MS_LOG(INFO) << "int64 found: " << json_obj << ".";
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar<int64_t>(data, &int_tensor));
|
||||
} else if (data_schema_->column(col_num).type() == DataType::DE_INT32) {
|
||||
} else if (data_schema_->Column(col_num).Type() == DataType::DE_INT32) {
|
||||
int32_t data = json_obj;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar<int32_t>(data, &int_tensor));
|
||||
MS_LOG(INFO) << "int32 found: " << json_obj << ".";
|
||||
|
@ -349,35 +350,35 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
|
|||
Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row) {
|
||||
int32_t i = index;
|
||||
// special case to handle
|
||||
if (data_schema_->column(i).name() == "id") {
|
||||
if (data_schema_->Column(i).Name() == "id") {
|
||||
// id is internal, special case to load from file
|
||||
return LoadIDTensor(file, i, row);
|
||||
}
|
||||
// find if key does not exist, insert placeholder nullptr if not found
|
||||
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
||||
if (js.find(data_schema_->Column(i).Name()) == js.end()) {
|
||||
// iterator not found, push nullptr as placeholder
|
||||
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
||||
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->Column(i).Name() << ".";
|
||||
return LoadEmptyTensor(i, row);
|
||||
}
|
||||
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
||||
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
||||
nlohmann::json column_value = js.at(data_schema_->Column(i).Name());
|
||||
MS_LOG(INFO) << "This column is: " << data_schema_->Column(i).Name() << ".";
|
||||
bool is_array = column_value.is_array();
|
||||
// load single string
|
||||
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
if (column_value.is_string() && data_schema_->Column(i).Type() == DataType::DE_STRING) {
|
||||
return LoadStringTensor(column_value, i, row);
|
||||
}
|
||||
// load string array
|
||||
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
if (is_array && data_schema_->Column(i).Type() == DataType::DE_STRING) {
|
||||
return LoadStringArrayTensor(column_value, i, row);
|
||||
}
|
||||
// load image file
|
||||
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
||||
if (column_value.is_string() && data_schema_->Column(i).Type() != DataType::DE_STRING) {
|
||||
std::string image_file_path = column_value;
|
||||
return LoadImageTensor(image_file_path, i, row);
|
||||
}
|
||||
// load float value
|
||||
bool judge_float = (data_schema_->column(i).type() == DataType::DE_FLOAT32) ||
|
||||
(data_schema_->column(i).type() == DataType::DE_FLOAT64);
|
||||
bool judge_float = (data_schema_->Column(i).Type() == DataType::DE_FLOAT32) ||
|
||||
(data_schema_->Column(i).Type() == DataType::DE_FLOAT64);
|
||||
if (!is_array && judge_float) {
|
||||
return LoadFloatTensor(column_value, i, row);
|
||||
}
|
||||
|
@ -387,15 +388,15 @@ Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann:
|
|||
}
|
||||
// int value
|
||||
if (!is_array &&
|
||||
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
(data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
|
||||
return LoadIntTensor(column_value, i, row);
|
||||
}
|
||||
// int array
|
||||
if (is_array &&
|
||||
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
(data_schema_->Column(i).Type() == DataType::DE_INT64 || data_schema_->Column(i).Type() == DataType::DE_INT32)) {
|
||||
return LoadIntArrayTensor(column_value, i, row);
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
|
||||
MS_LOG(WARNING) << "Value type for column: " << data_schema_->Column(i).Name() << " is not supported.";
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
|
@ -438,7 +439,7 @@ Status AlbumOp::ComputeColMap() {
|
|||
// Set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -258,7 +258,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, TensorRow *row) {
|
|||
}
|
||||
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->column(1).type(), &label));
|
||||
Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->Column(1).Type(), &label));
|
||||
RETURN_IF_NOT_OK(label->Zero());
|
||||
for (uint32_t index = 0; index < image_label.second.size(); index++) {
|
||||
if (image_label.second[index] == 1) {
|
||||
|
@ -294,7 +294,7 @@ Status CelebAOp::ComputeColMap() {
|
|||
// Set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t index = 0; index < data_schema_->NumColumns(); index++) {
|
||||
column_name_id_map_[data_schema_->column(index).name()] = index;
|
||||
column_name_id_map_[data_schema_->Column(index).Name()] = index;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -205,8 +205,8 @@ Status CifarOp::GetCifarFiles() {
|
|||
Path dir_path(folder_path_);
|
||||
auto dirIt = Path::DirIterator::OpenDirectory(&dir_path);
|
||||
if (dirIt) {
|
||||
while (dirIt->hasNext()) {
|
||||
Path file = dirIt->next();
|
||||
while (dirIt->HasNext()) {
|
||||
Path file = dirIt->Next();
|
||||
if (file.Extension() == kExtension) {
|
||||
cifar_files_.push_back(file.toString());
|
||||
}
|
||||
|
@ -236,7 +236,7 @@ Status CifarOp::ParseCifarData() {
|
|||
|
||||
std::shared_ptr<Tensor> image_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
|
||||
data_schema_->column(0).type(), &image_tensor));
|
||||
data_schema_->Column(0).Type(), &image_tensor));
|
||||
auto itr = image_tensor->begin<uint8_t>();
|
||||
uint32_t total_pix = kCifarImageHeight * kCifarImageWidth;
|
||||
for (uint32_t pix = 0; pix < total_pix; ++pix) {
|
||||
|
@ -369,7 +369,7 @@ Status CifarOp::ComputeColMap() {
|
|||
// set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -159,7 +159,7 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
}
|
||||
|
||||
std::string kImageFile = image_folder_path_ + std::string("/") + image_id;
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
|
||||
|
||||
auto bboxRow = itr->second;
|
||||
std::vector<float> bbox_row;
|
||||
|
@ -590,7 +590,7 @@ Status CocoOp::ComputeColMap() {
|
|||
// Set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -156,8 +156,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
|
|||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + folder_name);
|
||||
}
|
||||
std::set<std::string> imgs; // use this for ordering
|
||||
while (dirItr->hasNext()) {
|
||||
Path file = dirItr->next();
|
||||
while (dirItr->HasNext()) {
|
||||
Path file = dirItr->Next();
|
||||
if (extensions_.empty() || extensions_.find(file.Extension()) != extensions_.end()) {
|
||||
(void)imgs.insert(file.toString().substr(dirname_offset_));
|
||||
} else {
|
||||
|
@ -182,8 +182,8 @@ Status ImageFolderOp::PrescanWorkerEntry(int32_t worker_id) {
|
|||
Status ImageFolderOp::RecursiveWalkFolder(Path *dir) {
|
||||
std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(dir);
|
||||
RETURN_UNEXPECTED_IF_NULL(dir_itr);
|
||||
while (dir_itr->hasNext()) {
|
||||
Path subdir = dir_itr->next();
|
||||
while (dir_itr->HasNext()) {
|
||||
Path subdir = dir_itr->Next();
|
||||
if (subdir.IsDirectory()) {
|
||||
if (class_index_.empty() ||
|
||||
class_index_.find(subdir.toString().substr(dirname_offset_ + 1)) != class_index_.end()) {
|
||||
|
@ -256,8 +256,8 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
|
|||
std::queue<std::string> folder_paths;
|
||||
std::shared_ptr<Path::DirIterator> dir_itr = Path::DirIterator::OpenDirectory(&dir);
|
||||
std::unordered_set<std::string> folder_names;
|
||||
while (dir_itr->hasNext()) {
|
||||
Path subdir = dir_itr->next();
|
||||
while (dir_itr->HasNext()) {
|
||||
Path subdir = dir_itr->Next();
|
||||
if (subdir.IsDirectory()) {
|
||||
folder_paths.push(subdir.toString());
|
||||
if (!class_index.empty()) folder_names.insert(subdir.Basename());
|
||||
|
@ -283,7 +283,7 @@ Status ImageFolderOp::CountRowsAndClasses(const std::string &path, const std::se
|
|||
if (subdir.Exists() == false || dir_itr == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid file, failed to open folder: " + subdir.toString());
|
||||
}
|
||||
while (dir_itr->hasNext()) {
|
||||
while (dir_itr->HasNext()) {
|
||||
if (exts.empty() || exts.find(subdir.Extension()) != exts.end()) {
|
||||
++row_cnt;
|
||||
}
|
||||
|
@ -298,7 +298,7 @@ Status ImageFolderOp::ComputeColMap() {
|
|||
// Set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -339,7 +339,7 @@ Status ManifestOp::ComputeColMap() {
|
|||
// Set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -113,7 +113,7 @@ Status MindRecordOp::Init() {
|
|||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
colname_to_ind.find(colname) != colname_to_ind.end(),
|
||||
"Invalid data, specified loading column name: " + colname + " does not exist in data file.");
|
||||
RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->column(colname_to_ind[colname])));
|
||||
RETURN_IF_NOT_OK(tmp_schema->AddColumn(data_schema_->Column(colname_to_ind[colname])));
|
||||
}
|
||||
data_schema_ = std::move(tmp_schema);
|
||||
}
|
||||
|
@ -271,8 +271,8 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
|
|||
}
|
||||
|
||||
std::shared_ptr<Tensor> tensor;
|
||||
const ColDescriptor &column = data_schema_->column(i_col);
|
||||
DataType type = column.type();
|
||||
const ColDescriptor &column = data_schema_->Column(i_col);
|
||||
DataType type = column.Type();
|
||||
|
||||
// Set shape
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(column_data_type_size != 0, "Found memory size of column data type is 0.");
|
||||
|
@ -280,9 +280,14 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
|
|||
if (type == DataType::DE_STRING) {
|
||||
std::string s{data, data + n_bytes};
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(s, &tensor));
|
||||
} else if (column.hasShape()) {
|
||||
auto new_shape = TensorShape(column.shape());
|
||||
RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape));
|
||||
} else if (column.HasShape()) {
|
||||
auto new_shape = TensorShape(column.Shape());
|
||||
// if the numpy is null, create empty tensor shape
|
||||
if (num_elements == 0) {
|
||||
new_shape = TensorShape({});
|
||||
} else {
|
||||
RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape));
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(new_shape, type, data, &tensor));
|
||||
} else {
|
||||
std::vector<dsize_t> shapeDetails = {static_cast<dsize_t>(num_elements)};
|
||||
|
|
|
@ -220,7 +220,7 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
|
|||
pixels[m] = (pixels[m] == 0) ? 0 : 255;
|
||||
}
|
||||
std::shared_ptr<Tensor> image;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(),
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->Column(0).Type(),
|
||||
reinterpret_cast<unsigned char *>(pixels), &image));
|
||||
image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j]));
|
||||
image_path_.push_back(image_names_[index]);
|
||||
|
@ -265,8 +265,8 @@ Status MnistOp::WalkAllFiles() {
|
|||
std::string prefix; // empty string, used to match usage = "" (default) or usage == "all"
|
||||
if (usage_ == "train" || usage_ == "test") prefix = (usage_ == "test" ? test_prefix : train_prefix);
|
||||
if (dir_it != nullptr) {
|
||||
while (dir_it->hasNext()) {
|
||||
Path file = dir_it->next();
|
||||
while (dir_it->HasNext()) {
|
||||
Path file = dir_it->Next();
|
||||
std::string fname = file.Basename(); // name of the mnist file
|
||||
if ((fname.find(prefix + "-images") != std::string::npos) && (fname.find(img_ext) != std::string::npos)) {
|
||||
image_names_.push_back(file.toString());
|
||||
|
@ -338,7 +338,7 @@ Status MnistOp::ComputeColMap() {
|
|||
// set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -267,8 +267,8 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
|
|||
|
||||
// Create a tensor for each column, then add the tensor to the row
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
const ColDescriptor current_col = data_schema_->column(i);
|
||||
std::vector<dsize_t> current_shape = current_col.shape().AsVector();
|
||||
const ColDescriptor current_col = data_schema_->Column(i);
|
||||
std::vector<dsize_t> current_shape = current_col.Shape().AsVector();
|
||||
std::unique_ptr<TensorShape> new_shape = nullptr;
|
||||
std::unique_ptr<unsigned char[]> buf = nullptr;
|
||||
std::shared_ptr<Tensor> new_tensor = nullptr;
|
||||
|
@ -282,7 +282,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
|
|||
}
|
||||
|
||||
new_shape = std::make_unique<TensorShape>(current_shape);
|
||||
int64_t size_in_bytes = new_shape->NumOfElements() * current_col.type().SizeInBytes();
|
||||
int64_t size_in_bytes = new_shape->NumOfElements() * current_col.Type().SizeInBytes();
|
||||
|
||||
// Generate a random byte of data. This may cause some funny data for things like doubles,floats, bools
|
||||
// however the random data op is not too concerned about the physical data itself.
|
||||
|
@ -296,7 +296,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
|
|||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.type(), buf.get(), &new_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.Type(), buf.get(), &new_tensor));
|
||||
|
||||
// Add this tensor to the tensor row for output
|
||||
(*new_row).push_back(std::move(new_tensor));
|
||||
|
|
|
@ -75,7 +75,7 @@ Status SamplerRT::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64
|
|||
col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1);
|
||||
}
|
||||
TensorShape shape(std::vector<dsize_t>(1, num_elements));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->type(), sample_ids));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->Type(), sample_ids));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -225,7 +225,7 @@ Status TextFileOp::ComputeColMap() {
|
|||
// Set the column name mapping (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -123,7 +123,7 @@ Status TFReaderOp::Init() {
|
|||
}
|
||||
|
||||
if (total_rows_ == 0) {
|
||||
total_rows_ = data_schema_->num_rows();
|
||||
total_rows_ = data_schema_->NumRows();
|
||||
}
|
||||
if (total_rows_ < 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
|
@ -332,12 +332,12 @@ Status TFReaderOp::LoadFile(const std::string &filename, int64_t start_offset, i
|
|||
Status TFReaderOp::LoadExample(const dataengine::Example *tf_file, TensorRow *out_row) {
|
||||
int32_t num_columns = data_schema_->NumColumns();
|
||||
for (int32_t col = 0; col < num_columns; ++col) {
|
||||
const ColDescriptor current_col = data_schema_->column(col);
|
||||
const ColDescriptor current_col = data_schema_->Column(col);
|
||||
const dataengine::Features &example_features = tf_file->features();
|
||||
const google::protobuf::Map<std::string, dataengine::Feature> &feature_map = example_features.feature();
|
||||
auto iter_column = feature_map.find(current_col.name());
|
||||
auto iter_column = feature_map.find(current_col.Name());
|
||||
if (iter_column == feature_map.end()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.name() + " does not exist.");
|
||||
RETURN_STATUS_UNEXPECTED("Invalid parameter, column name: " + current_col.Name() + " does not exist.");
|
||||
}
|
||||
const dataengine::Feature &column_values_list = iter_column->second;
|
||||
RETURN_IF_NOT_OK(LoadFeature(out_row, column_values_list, current_col, col));
|
||||
|
@ -379,7 +379,7 @@ Status TFReaderOp::LoadFeature(TensorRow *tensor_row, const dataengine::Feature
|
|||
// into the tensor
|
||||
TensorShape current_shape = TensorShape::CreateUnknownRankShape();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, ¤t_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.type(), data_ptr, &ts));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.Type(), data_ptr, &ts));
|
||||
break;
|
||||
}
|
||||
case dataengine::Feature::KindCase::kInt64List: {
|
||||
|
@ -406,10 +406,10 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
// kBytesList can map to the following DE types ONLY!
|
||||
// DE_UINT8, DE_INT8
|
||||
// Must be single byte type for each element!
|
||||
if (current_col.type() != DataType::DE_UINT8 && current_col.type() != DataType::DE_INT8 &&
|
||||
current_col.type() != DataType::DE_STRING) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() +
|
||||
", data type should be int8, uint8 or string, but got " + current_col.type().ToString();
|
||||
if (current_col.Type() != DataType::DE_UINT8 && current_col.Type() != DataType::DE_INT8 &&
|
||||
current_col.Type() != DataType::DE_STRING) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
|
||||
", data type should be int8, uint8 or string, but got " + current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -417,7 +417,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
|
||||
*num_elements = bytes_list.value_size();
|
||||
|
||||
if (current_col.type() == DataType::DE_STRING) {
|
||||
if (current_col.Type() == DataType::DE_STRING) {
|
||||
TensorShape shape = TensorShape::CreateScalar();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor));
|
||||
|
@ -436,14 +436,14 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
int64_t pad_size = max_size;
|
||||
|
||||
// if user provides a shape in the form of [-1, d1, 2d, ... , dn], we need to pad to d1 * d2 * ... * dn
|
||||
if (current_col.hasShape()) {
|
||||
TensorShape cur_shape = current_col.shape();
|
||||
if (current_col.HasShape()) {
|
||||
TensorShape cur_shape = current_col.Shape();
|
||||
if (cur_shape.Size() >= 2 && cur_shape[0] == TensorShape::kDimUnknown) {
|
||||
int64_t new_pad_size = 1;
|
||||
for (int i = 1; i < cur_shape.Size(); ++i) {
|
||||
if (cur_shape[i] == TensorShape::kDimUnknown) {
|
||||
std::string err_msg =
|
||||
"Invalid data, more than one unknown dimension in the shape of column: " + current_col.name();
|
||||
"Invalid data, more than one unknown dimension in the shape of column: " + current_col.Name();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
new_pad_size *= cur_shape[i];
|
||||
|
@ -451,7 +451,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
pad_size = new_pad_size;
|
||||
} else {
|
||||
if (cur_shape.known() && cur_shape.NumOfElements() != max_size) {
|
||||
std::string err_msg = "Shape in schema's column '" + current_col.name() + "' is incorrect." +
|
||||
std::string err_msg = "Shape in schema's column '" + current_col.Name() + "' is incorrect." +
|
||||
"\nshape received: " + cur_shape.ToString() +
|
||||
"\ntotal elements in shape received: " + std::to_string(cur_shape.NumOfElements()) +
|
||||
"\nexpected total elements in shape: " + std::to_string(max_size);
|
||||
|
@ -463,7 +463,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
// know how many elements there are and the total bytes, create tensor here:
|
||||
TensorShape current_shape = TensorShape::CreateScalar();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, ¤t_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.type(), pad_size, tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.Type(), pad_size, tensor));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -472,9 +472,9 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng
|
|||
int32_t *num_elements, std::unique_ptr<float[]> *float_array) {
|
||||
// KFloatList can only map to DE types:
|
||||
// DE_FLOAT32
|
||||
if (current_col.type() != DataType::DE_FLOAT32) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() +
|
||||
", data type should be string, but got " + current_col.type().ToString();
|
||||
if (current_col.Type() != DataType::DE_FLOAT32) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
|
||||
", data type should be string, but got " + current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -494,26 +494,26 @@ Status TFReaderOp::LoadFloatList(const ColDescriptor ¤t_col, const dataeng
|
|||
// Determines which template type to use and calls LoadIntList
|
||||
Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list,
|
||||
int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
|
||||
if (current_col.type() == DataType::DE_UINT64) {
|
||||
if (current_col.Type() == DataType::DE_UINT64) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<uint64_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else if (current_col.type() == DataType::DE_INT64) {
|
||||
} else if (current_col.Type() == DataType::DE_INT64) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<int64_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else if (current_col.type() == DataType::DE_UINT32) {
|
||||
} else if (current_col.Type() == DataType::DE_UINT32) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<uint32_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else if (current_col.type() == DataType::DE_INT32) {
|
||||
} else if (current_col.Type() == DataType::DE_INT32) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<int32_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else if (current_col.type() == DataType::DE_UINT16) {
|
||||
} else if (current_col.Type() == DataType::DE_UINT16) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<uint16_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else if (current_col.type() == DataType::DE_INT16) {
|
||||
} else if (current_col.Type() == DataType::DE_INT16) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<int16_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else if (current_col.type() == DataType::DE_UINT8) {
|
||||
} else if (current_col.Type() == DataType::DE_UINT8) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<uint8_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else if (current_col.type() == DataType::DE_INT8) {
|
||||
} else if (current_col.Type() == DataType::DE_INT8) {
|
||||
RETURN_IF_NOT_OK(LoadIntList<int8_t>(current_col, column_values_list, num_elements, tensor));
|
||||
} else {
|
||||
std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.name() +
|
||||
std::string err_msg = "Invalid data, invalid datatype for Tensor at column: " + current_col.Name() +
|
||||
", data type should be uint64, int64, uint32, int32, uint16, int16, uint8 or int8" +
|
||||
", but got " + current_col.type().ToString();
|
||||
", but got " + current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -525,9 +525,9 @@ Status TFReaderOp::LoadIntListSwitch(const ColDescriptor ¤t_col, const dat
|
|||
template <typename T>
|
||||
Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengine::Feature &column_values_list,
|
||||
int32_t *num_elements, std::shared_ptr<Tensor> *tensor) {
|
||||
if (!(current_col.type().IsInt())) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.name() +
|
||||
", data type should be int, but got " + current_col.type().ToString();
|
||||
if (!(current_col.Type().IsInt())) {
|
||||
std::string err_msg = "Invalid data, invalid data type for Tensor at column: " + current_col.Name() +
|
||||
", data type should be int, but got " + current_col.Type().ToString();
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
|
||||
|
@ -540,7 +540,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin
|
|||
// know how many elements there are, create tensor here:
|
||||
TensorShape current_shape = TensorShape::CreateUnknownRankShape();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, ¤t_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.type(), tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.Type(), tensor));
|
||||
|
||||
int64_t i = 0;
|
||||
auto it = (*tensor)->begin<T>();
|
||||
|
@ -719,7 +719,7 @@ Status TFReaderOp::ComputeColMap() {
|
|||
// Construct the column name map for this operator (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -133,8 +133,8 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
std::shared_ptr<Tensor> image, target;
|
||||
const std::string kTargetFile =
|
||||
folder_path_ + std::string(kSegmentationClassFolder) + image_id + std::string(kSegmentationExtension);
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->column(1), &target));
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kTargetFile, data_schema_->Column(1), &target));
|
||||
(*trow) = TensorRow(row_id, {std::move(image), std::move(target)});
|
||||
path_list = {kImageFile, kTargetFile};
|
||||
} else if (task_type_ == TaskType::Detection) {
|
||||
|
@ -142,7 +142,7 @@ Status VOCOp::LoadTensorRow(row_id_type row_id, TensorRow *trow) {
|
|||
TensorRow annotation;
|
||||
const std::string kAnnotationFile =
|
||||
folder_path_ + std::string(kAnnotationsFolder) + image_id + std::string(kAnnotationExtension);
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->column(0), &image));
|
||||
RETURN_IF_NOT_OK(ReadImageToTensor(kImageFile, data_schema_->Column(0), &image));
|
||||
RETURN_IF_NOT_OK(ReadAnnotationToTensor(kAnnotationFile, &annotation));
|
||||
trow->setId(row_id);
|
||||
trow->push_back(std::move(image));
|
||||
|
@ -406,7 +406,7 @@ Status VOCOp::ComputeColMap() {
|
|||
// Set the column name map (base class field)
|
||||
if (column_name_id_map_.empty()) {
|
||||
for (int32_t i = 0; i < data_schema_->NumColumns(); ++i) {
|
||||
column_name_id_map_[data_schema_->column(i).name()] = i;
|
||||
column_name_id_map_[data_schema_->Column(i).Name()] = i;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Column name map is already set!";
|
||||
|
|
|
@ -62,6 +62,7 @@ ExecutionTree::~ExecutionTree() {
|
|||
// provides it with a link to the tree. A node cannot form any relationships (parent/child) with
|
||||
// other nodes unless they are associated with the same tree.
|
||||
Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) {
|
||||
RETURN_UNEXPECTED_IF_NULL(op);
|
||||
// If we are already a part of the tree, no-op
|
||||
if (op->tree_ == this) {
|
||||
return Status::OK();
|
||||
|
@ -88,6 +89,7 @@ Status ExecutionTree::AssociateNode(const std::shared_ptr<DatasetOp> &op) {
|
|||
|
||||
// Sets the root node of the tree
|
||||
Status ExecutionTree::AssignRoot(const std::shared_ptr<DatasetOp> &op) {
|
||||
RETURN_UNEXPECTED_IF_NULL(op);
|
||||
// Tree must be in building state before we can assign root to it
|
||||
if (tree_state_ != kDeTStateBuilding) {
|
||||
std::string err_msg =
|
||||
|
@ -121,6 +123,9 @@ void ExecutionTree::Print(std::ostream &out, const std::shared_ptr<DatasetOp> &o
|
|||
// A helper functions for doing the recursive printing
|
||||
void ExecutionTree::PrintNode(std::ostream &out, const std::shared_ptr<DatasetOp> &dataset_op, std::string indent,
|
||||
bool last, bool detailed) const {
|
||||
if (dataset_op == nullptr) {
|
||||
return;
|
||||
}
|
||||
// Decide which printer to use based on detailed arg.
|
||||
if (!detailed) {
|
||||
out << indent << "+- " << *dataset_op;
|
||||
|
|
|
@ -41,6 +41,7 @@ GraphDataImpl::GraphDataImpl(std::string dataset_file, int32_t num_workers, bool
|
|||
GraphDataImpl::~GraphDataImpl() {}
|
||||
|
||||
Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
auto itr = node_type_map_.find(node_type);
|
||||
if (itr == node_type_map_.end()) {
|
||||
std::string err_msg = "Invalid node type:" + std::to_string(node_type);
|
||||
|
@ -54,6 +55,7 @@ Status GraphDataImpl::GetAllNodes(NodeType node_type, std::shared_ptr<Tensor> *o
|
|||
template <typename T>
|
||||
Status GraphDataImpl::CreateTensorByVector(const std::vector<std::vector<T>> &data, DataType type,
|
||||
std::shared_ptr<Tensor> *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
if (!type.IsCompatible<T>()) {
|
||||
RETURN_STATUS_UNEXPECTED("Data type not compatible");
|
||||
}
|
||||
|
@ -96,6 +98,7 @@ Status GraphDataImpl::ComplementVector(std::vector<std::vector<T>> *data, size_t
|
|||
}
|
||||
|
||||
Status GraphDataImpl::GetAllEdges(EdgeType edge_type, std::shared_ptr<Tensor> *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
auto itr = edge_type_map_.find(edge_type);
|
||||
if (itr == edge_type_map_.end()) {
|
||||
std::string err_msg = "Invalid edge type:" + std::to_string(edge_type);
|
||||
|
@ -110,6 +113,7 @@ Status GraphDataImpl::GetNodesFromEdges(const std::vector<EdgeIdType> &edge_list
|
|||
if (edge_list.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Input edge_list is empty");
|
||||
}
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
|
||||
std::vector<std::vector<NodeIdType>> node_list;
|
||||
node_list.reserve(edge_list.size());
|
||||
|
@ -156,6 +160,7 @@ Status GraphDataImpl::GetAllNeighbors(const std::vector<NodeIdType> &node_list,
|
|||
const OutputFormat &format, std::shared_ptr<Tensor> *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
|
||||
RETURN_IF_NOT_OK(CheckNeighborType(neighbor_type));
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
|
||||
std::vector<std::vector<NodeIdType>> neighbors;
|
||||
|
||||
|
@ -251,6 +256,7 @@ Status GraphDataImpl::GetSampledNeighbors(const std::vector<NodeIdType> &node_li
|
|||
for (const auto &type : neighbor_types) {
|
||||
RETURN_IF_NOT_OK(CheckNeighborType(type));
|
||||
}
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
std::vector<std::vector<NodeIdType>> neighbors_vec(node_list.size());
|
||||
for (size_t node_idx = 0; node_idx < node_list.size(); ++node_idx) {
|
||||
std::shared_ptr<Node> input_node;
|
||||
|
@ -285,6 +291,7 @@ Status GraphDataImpl::NegativeSample(const std::vector<NodeIdType> &data, const
|
|||
size_t *start_index, const std::unordered_set<NodeIdType> &exclude_data,
|
||||
int32_t samples_num, std::vector<NodeIdType> *out_samples) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!data.empty(), "Input data is empty.");
|
||||
RETURN_UNEXPECTED_IF_NULL(start_index);
|
||||
size_t index = *start_index;
|
||||
for (size_t i = index; i < shuffled_ids.size(); ++i) {
|
||||
++index;
|
||||
|
@ -305,6 +312,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
|
|||
CHECK_FAIL_RETURN_UNEXPECTED(!node_list.empty(), "Input node_list is empty.");
|
||||
RETURN_IF_NOT_OK(CheckSamplesNum(samples_num));
|
||||
RETURN_IF_NOT_OK(CheckNeighborType(neg_neighbor_type));
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
|
||||
const std::vector<NodeIdType> &all_nodes = node_type_map_[neg_neighbor_type];
|
||||
std::vector<NodeIdType> shuffled_id(all_nodes.size());
|
||||
|
@ -321,9 +329,9 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
|
|||
std::vector<NodeIdType> neighbors;
|
||||
RETURN_IF_NOT_OK(node->GetAllNeighbors(neg_neighbor_type, &neighbors));
|
||||
std::unordered_set<NodeIdType> exclude_nodes;
|
||||
std::transform(neighbors.begin(), neighbors.end(),
|
||||
std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()),
|
||||
[](const NodeIdType node) { return node; });
|
||||
(void)std::transform(neighbors.begin(), neighbors.end(),
|
||||
std::insert_iterator<std::unordered_set<NodeIdType>>(exclude_nodes, exclude_nodes.begin()),
|
||||
[](const NodeIdType node) { return node; });
|
||||
neg_neighbors_vec[node_idx].emplace_back(node->id());
|
||||
if (all_nodes.size() > exclude_nodes.size()) {
|
||||
while (neg_neighbors_vec[node_idx].size() < samples_num + 1) {
|
||||
|
@ -355,6 +363,7 @@ Status GraphDataImpl::GetNegSampledNeighbors(const std::vector<NodeIdType> &node
|
|||
Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const std::vector<NodeType> &meta_path,
|
||||
float step_home_param, float step_away_param, NodeIdType default_node,
|
||||
std::shared_ptr<Tensor> *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
RETURN_IF_NOT_OK(random_walk_.Build(node_list, meta_path, step_home_param, step_away_param, default_node));
|
||||
std::vector<std::vector<NodeIdType>> walks;
|
||||
RETURN_IF_NOT_OK(random_walk_.SimulateWalk(&walks));
|
||||
|
@ -363,6 +372,7 @@ Status GraphDataImpl::RandomWalk(const std::vector<NodeIdType> &node_list, const
|
|||
}
|
||||
|
||||
Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out_feature);
|
||||
auto itr = default_node_feature_map_.find(feature_type);
|
||||
if (itr == default_node_feature_map_.end()) {
|
||||
std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
|
||||
|
@ -374,6 +384,7 @@ Status GraphDataImpl::GetNodeDefaultFeature(FeatureType feature_type, std::share
|
|||
}
|
||||
|
||||
Status GraphDataImpl::GetEdgeDefaultFeature(FeatureType feature_type, std::shared_ptr<Feature> *out_feature) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out_feature);
|
||||
auto itr = default_edge_feature_map_.find(feature_type);
|
||||
if (itr == default_edge_feature_map_.end()) {
|
||||
std::string err_msg = "Invalid feature type:" + std::to_string(feature_type);
|
||||
|
@ -390,6 +401,7 @@ Status GraphDataImpl::GetNodeFeature(const std::shared_ptr<Tensor> &nodes,
|
|||
RETURN_STATUS_UNEXPECTED("Input nodes is empty");
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
TensorRow tensors;
|
||||
for (const auto &f_type : feature_types) {
|
||||
std::shared_ptr<Feature> default_feature;
|
||||
|
@ -436,6 +448,7 @@ Status GraphDataImpl::GetNodeFeatureSharedMemory(const std::shared_ptr<Tensor> &
|
|||
if (!nodes || nodes->Size() == 0) {
|
||||
RETURN_STATUS_UNEXPECTED("Input nodes is empty");
|
||||
}
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
TensorShape shape = nodes->shape().AppendDim(2);
|
||||
std::shared_ptr<Tensor> fea_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor));
|
||||
|
@ -478,6 +491,7 @@ Status GraphDataImpl::GetEdgeFeature(const std::shared_ptr<Tensor> &edges,
|
|||
RETURN_STATUS_UNEXPECTED("Input edges is empty");
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!feature_types.empty(), "Input feature_types is empty");
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
TensorRow tensors;
|
||||
for (const auto &f_type : feature_types) {
|
||||
std::shared_ptr<Feature> default_feature;
|
||||
|
@ -520,6 +534,7 @@ Status GraphDataImpl::GetEdgeFeatureSharedMemory(const std::shared_ptr<Tensor> &
|
|||
if (!edges || edges->Size() == 0) {
|
||||
RETURN_STATUS_UNEXPECTED("Input edges is empty");
|
||||
}
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
TensorShape shape = edges->shape().AppendDim(2);
|
||||
std::shared_ptr<Tensor> fea_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, DataType(DataType::DE_INT64), &fea_tensor));
|
||||
|
@ -554,14 +569,15 @@ Status GraphDataImpl::Init() {
|
|||
}
|
||||
|
||||
Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) {
|
||||
RETURN_UNEXPECTED_IF_NULL(meta_info);
|
||||
meta_info->node_type.resize(node_type_map_.size());
|
||||
std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(),
|
||||
[](auto itr) { return itr.first; });
|
||||
(void)std::transform(node_type_map_.begin(), node_type_map_.end(), meta_info->node_type.begin(),
|
||||
[](auto itr) { return itr.first; });
|
||||
std::sort(meta_info->node_type.begin(), meta_info->node_type.end());
|
||||
|
||||
meta_info->edge_type.resize(edge_type_map_.size());
|
||||
std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(),
|
||||
[](auto itr) { return itr.first; });
|
||||
(void)std::transform(edge_type_map_.begin(), edge_type_map_.end(), meta_info->edge_type.begin(),
|
||||
[](auto itr) { return itr.first; });
|
||||
std::sort(meta_info->edge_type.begin(), meta_info->edge_type.end());
|
||||
|
||||
for (const auto &node : node_type_map_) {
|
||||
|
@ -594,6 +610,7 @@ Status GraphDataImpl::GetMetaInfo(MetaInfo *meta_info) {
|
|||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status GraphDataImpl::GraphInfo(py::dict *out) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out);
|
||||
MetaInfo meta_info;
|
||||
RETURN_IF_NOT_OK(GetMetaInfo(&meta_info));
|
||||
(*out)["node_type"] = py::cast(meta_info.node_type);
|
||||
|
@ -616,6 +633,7 @@ Status GraphDataImpl::LoadNodeAndEdge() {
|
|||
}
|
||||
|
||||
Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
auto itr = node_id_map_.find(id);
|
||||
if (itr == node_id_map_.end()) {
|
||||
std::string err_msg = "Invalid node id:" + std::to_string(id);
|
||||
|
@ -627,6 +645,7 @@ Status GraphDataImpl::GetNodeByNodeId(NodeIdType id, std::shared_ptr<Node> *node
|
|||
}
|
||||
|
||||
Status GraphDataImpl::GetEdgeByEdgeId(EdgeIdType id, std::shared_ptr<Edge> *edge) {
|
||||
RETURN_UNEXPECTED_IF_NULL(edge);
|
||||
auto itr = edge_id_map_.find(id);
|
||||
if (itr == edge_id_map_.end()) {
|
||||
std::string err_msg = "Invalid edge id:" + std::to_string(id);
|
||||
|
@ -682,6 +701,7 @@ Status GraphDataImpl::RandomWalkBase::Build(const std::vector<NodeIdType> &node_
|
|||
}
|
||||
|
||||
Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node, std::vector<NodeIdType> *walk_path) {
|
||||
RETURN_UNEXPECTED_IF_NULL(walk_path);
|
||||
// Simulate a random walk starting from start node.
|
||||
auto walk = std::vector<NodeIdType>(1, start_node); // walk is an vector
|
||||
// walk simulate
|
||||
|
@ -722,6 +742,7 @@ Status GraphDataImpl::RandomWalkBase::Node2vecWalk(const NodeIdType &start_node,
|
|||
}
|
||||
|
||||
Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeIdType>> *walks) {
|
||||
RETURN_UNEXPECTED_IF_NULL(walks);
|
||||
for (int32_t i = 0; i < num_walks_; ++i) {
|
||||
for (const auto &node : node_list_) {
|
||||
std::vector<NodeIdType> walk;
|
||||
|
@ -734,6 +755,7 @@ Status GraphDataImpl::RandomWalkBase::SimulateWalk(std::vector<std::vector<NodeI
|
|||
|
||||
Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_id, const NodeType &node_type,
|
||||
std::shared_ptr<StochasticIndex> *node_probability) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node_probability);
|
||||
// Generate alias nodes
|
||||
std::shared_ptr<Node> node;
|
||||
RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(node_id, &node));
|
||||
|
@ -749,6 +771,7 @@ Status GraphDataImpl::RandomWalkBase::GetNodeProbability(const NodeIdType &node_
|
|||
Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src, const NodeIdType &dst,
|
||||
uint32_t meta_path_index,
|
||||
std::shared_ptr<StochasticIndex> *edge_probability) {
|
||||
RETURN_UNEXPECTED_IF_NULL(edge_probability);
|
||||
// Get the alias edge setup lists for a given edge.
|
||||
std::shared_ptr<Node> src_node;
|
||||
RETURN_IF_NOT_OK(graph_->GetNodeByNodeId(src, &src_node));
|
||||
|
@ -760,6 +783,8 @@ Status GraphDataImpl::RandomWalkBase::GetEdgeProbability(const NodeIdType &src,
|
|||
std::vector<NodeIdType> dst_neighbors;
|
||||
RETURN_IF_NOT_OK(dst_node->GetAllNeighbors(meta_path_[meta_path_index + 1], &dst_neighbors, true));
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(step_home_param_ != 0, "Invalid data, step home parameter can't be zero.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(step_away_param_ != 0, "Invalid data, step away parameter can't be zero.");
|
||||
std::sort(dst_neighbors.begin(), dst_neighbors.end());
|
||||
std::vector<float> non_normalized_probability;
|
||||
for (const auto &dst_nbr : dst_neighbors) {
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
#include "minddata/dataset/engine/gnn/graph_shared_memory.h"
|
||||
|
||||
#include <string>
|
||||
#include "debug/common.h"
|
||||
#include "utils/ms_utils.h"
|
||||
#include "minddata/dataset/util/log_adapter.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -51,7 +53,9 @@ GraphSharedMemory::~GraphSharedMemory() {
|
|||
Status GraphSharedMemory::CreateSharedMemory() {
|
||||
if (memory_key_ == -1) {
|
||||
// ftok to generate unique key
|
||||
memory_key_ = ftok(mr_file_.data(), kGnnSharedMemoryId);
|
||||
auto realpath = Common::GetRealPath(mr_file_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(), "Get real path failed, path=" + mr_file_);
|
||||
memory_key_ = ftok(common::SafeCStr(realpath.value()), kGnnSharedMemoryId);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(memory_key_ != -1, "Failed to get key of shared memory. file_name:" + mr_file_);
|
||||
std::stringstream stream;
|
||||
stream << std::hex << memory_key_;
|
||||
|
@ -89,6 +93,7 @@ Status GraphSharedMemory::DeleteSharedMemory() {
|
|||
|
||||
Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) {
|
||||
// shmget returns an identifier in shmid
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(memory_size_ >= 0, "Invalid memory size, should be greater than zero.");
|
||||
int shmid = shmget(memory_key_, memory_size_, shmflg);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(shmid != -1, "Failed to get shared memory. key=0x" + memory_key_str_);
|
||||
|
||||
|
@ -103,6 +108,7 @@ Status GraphSharedMemory::SharedMemoryImpl(const int &shmflg) {
|
|||
Status GraphSharedMemory::InsertData(const uint8_t *data, int64_t len, int64_t *offset) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(data, "Input data is nullptr.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(len > 0, "Input len is invalid.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(offset, "Input offset is nullptr.");
|
||||
|
||||
std::lock_guard<std::mutex> lck(mutex_);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((memory_size_ - memory_offset_ >= len),
|
||||
|
|
|
@ -46,6 +46,7 @@ class GpuItemConnector : public Connector<std::vector<device::DataItemGpu>> {
|
|||
}
|
||||
|
||||
Status Pop(int32_t worker_id, std::vector<device::DataItemGpu> *result) noexcept override {
|
||||
RETURN_UNEXPECTED_IF_NULL(result);
|
||||
{
|
||||
MS_ASSERT(worker_id < num_consumers_);
|
||||
std::unique_lock<std::mutex> lock(m_);
|
||||
|
|
|
@ -30,6 +30,7 @@ namespace dataset {
|
|||
// Helper function to compute a default shuffle size
|
||||
Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
|
||||
int64_t *shuffle_size) {
|
||||
RETURN_UNEXPECTED_IF_NULL(shuffle_size);
|
||||
const int64_t average_files_multiplier = 4;
|
||||
const int64_t shuffle_max = 10000;
|
||||
int64_t avg_rows_per_file = 0;
|
||||
|
@ -59,6 +60,7 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro
|
|||
// Helper function to inject a shuffle operator over top of current operator being built
|
||||
Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
|
||||
int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) {
|
||||
RETURN_UNEXPECTED_IF_NULL(shuffle_op);
|
||||
int64_t shuffle_size = 0;
|
||||
RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size));
|
||||
MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size;
|
||||
|
|
|
@ -56,6 +56,7 @@ void MapNode::Print(std::ostream &out) const {
|
|||
}
|
||||
|
||||
Status MapNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node_ops);
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
|
||||
// Build tensorOp from tensorOperation vector
|
||||
|
@ -128,12 +129,16 @@ Status MapNode::ValidateParams() {
|
|||
|
||||
// Visitor accepting method for IRNodePass
|
||||
Status MapNode::Accept(IRNodePass *const p, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(p);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Downcast shared pointer then call visitor
|
||||
return p->Visit(shared_from_base<MapNode>(), modified);
|
||||
}
|
||||
|
||||
// Visitor accepting method for IRNodePass
|
||||
Status MapNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(p);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Downcast shared pointer then call visitor
|
||||
return p->VisitAfter(shared_from_base<MapNode>(), modified);
|
||||
}
|
||||
|
@ -144,6 +149,7 @@ void MapNode::setOperations(const std::vector<std::shared_ptr<TensorOperation>>
|
|||
std::vector<std::shared_ptr<TensorOperation>> MapNode::operations() { return operations_; }
|
||||
|
||||
Status MapNode::to_json(nlohmann::json *out_json) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out_json);
|
||||
nlohmann::json args;
|
||||
args["num_parallel_workers"] = num_workers_;
|
||||
args["input_columns"] = input_columns_;
|
||||
|
@ -158,6 +164,7 @@ Status MapNode::to_json(nlohmann::json *out_json) {
|
|||
std::vector<nlohmann::json> ops;
|
||||
std::vector<int32_t> cbs;
|
||||
for (auto op : operations_) {
|
||||
RETURN_UNEXPECTED_IF_NULL(op);
|
||||
nlohmann::json op_args;
|
||||
RETURN_IF_NOT_OK(op->to_json(&op_args));
|
||||
if (op->Name() == "PyFuncOp") {
|
||||
|
@ -170,8 +177,8 @@ Status MapNode::to_json(nlohmann::json *out_json) {
|
|||
}
|
||||
}
|
||||
args["operations"] = ops;
|
||||
std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs),
|
||||
[](std::shared_ptr<DSCallback> cb) -> int32_t { return cb->step_size(); });
|
||||
(void)std::transform(callbacks_.begin(), callbacks_.end(), std::back_inserter(cbs),
|
||||
[](std::shared_ptr<DSCallback> cb) -> int32_t { return cb != nullptr ? cb->step_size() : 0; });
|
||||
args["callback"] = cbs;
|
||||
*out_json = args;
|
||||
return Status::OK();
|
||||
|
|
|
@ -106,8 +106,8 @@ Status AlbumNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_
|
|||
}
|
||||
std::set<std::string> extensions = {".json", ".JSON"};
|
||||
|
||||
while (dirItr->hasNext()) {
|
||||
Path file = dirItr->next();
|
||||
while (dirItr->HasNext()) {
|
||||
Path file = dirItr->Next();
|
||||
if (extensions.empty() || extensions.find(file.Extension()) != extensions.end()) {
|
||||
num_rows += 1;
|
||||
}
|
||||
|
|
|
@ -73,9 +73,9 @@ Status GeneratorNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_
|
|||
RETURN_IF_NOT_OK(data_schema->LoadSchemaString(schema_json_string, {}));
|
||||
|
||||
for (int32_t i = 0; i < data_schema->NumColumns(); i++) {
|
||||
ColDescriptor col = data_schema->column(i);
|
||||
column_names_.push_back(col.name());
|
||||
column_types_.push_back((col.type()));
|
||||
ColDescriptor col = data_schema->Column(i);
|
||||
column_names_.push_back(col.Name());
|
||||
column_types_.push_back((col.Type()));
|
||||
}
|
||||
}
|
||||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
|
|
|
@ -131,7 +131,7 @@ Status RandomNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size
|
|||
*dataset_size = dataset_size_;
|
||||
return Status::OK();
|
||||
}
|
||||
int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->num_rows();
|
||||
int64_t num_rows = total_rows_ != 0 ? total_rows_ : data_schema_->NumRows();
|
||||
*dataset_size = num_rows;
|
||||
dataset_size_ = *dataset_size;
|
||||
return Status::OK();
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "debug/common.h"
|
||||
#include "minddata/dataset/engine/datasetops/source/tf_reader_op.h"
|
||||
#include "minddata/dataset/engine/jagged_connector.h"
|
||||
#include "minddata/dataset/engine/opt/pass.h"
|
||||
|
@ -58,13 +59,9 @@ Status TFRecordNode::ValidateParams() {
|
|||
}
|
||||
|
||||
for (const auto &f : dataset_files_) {
|
||||
Path dataset_file(f);
|
||||
if (!dataset_file.Exists()) {
|
||||
std::string err_msg = "TFRecordNode: dataset file: [" + f + "] is invalid or does not exist.";
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
|
||||
return Status(StatusCode::kMDSyntaxError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
auto realpath = Common::GetRealPath(f);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(realpath.has_value(),
|
||||
"TFRecordNode: dataset file: [" + f + "] is invalid or does not exist.");
|
||||
}
|
||||
|
||||
if (num_samples_ < 0) {
|
||||
|
@ -107,6 +104,7 @@ Status TFRecordNode::ValidateParams() {
|
|||
|
||||
// Function to build TFRecordNode
|
||||
Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node_ops);
|
||||
// Sort the datasets file in a lexicographical order
|
||||
std::vector<std::string> sorted_dir_files = dataset_files_;
|
||||
std::sort(sorted_dir_files.begin(), sorted_dir_files.end());
|
||||
|
@ -165,6 +163,8 @@ Status TFRecordNode::GetShardId(int32_t *const shard_id) {
|
|||
// Get Dataset size
|
||||
Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &size_getter, bool estimate,
|
||||
int64_t *dataset_size) {
|
||||
RETURN_UNEXPECTED_IF_NULL(size_getter);
|
||||
RETURN_UNEXPECTED_IF_NULL(dataset_size);
|
||||
if (dataset_size_ > 0) {
|
||||
*dataset_size = dataset_size_;
|
||||
return Status::OK();
|
||||
|
@ -189,6 +189,7 @@ Status TFRecordNode::GetDatasetSize(const std::shared_ptr<DatasetSizeGetter> &si
|
|||
|
||||
// Get the file list of the specific shard ID
|
||||
Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames) {
|
||||
RETURN_UNEXPECTED_IF_NULL(shard_filenames);
|
||||
if (!shard_filenames->empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("The initial file list must be empty.");
|
||||
}
|
||||
|
@ -201,6 +202,7 @@ Status TFRecordNode::GetShardFileList(std::vector<std::string> *shard_filenames)
|
|||
}
|
||||
|
||||
Status TFRecordNode::to_json(nlohmann::json *out_json) {
|
||||
RETURN_UNEXPECTED_IF_NULL(out_json);
|
||||
nlohmann::json args;
|
||||
args["num_parallel_workers"] = num_workers_;
|
||||
args["dataset_files"] = dataset_files_;
|
||||
|
@ -235,6 +237,7 @@ Status TFRecordNode::to_json(nlohmann::json *out_json) {
|
|||
// inherit this sampler from the leaf, providing sampling support from the caching layer.
|
||||
// That is why we setup the sampler for a leaf node that does not use sampling.
|
||||
Status TFRecordNode::SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) {
|
||||
RETURN_UNEXPECTED_IF_NULL(sampler);
|
||||
bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles);
|
||||
*sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_);
|
||||
return Status::OK();
|
||||
|
@ -254,12 +257,16 @@ Status TFRecordNode::MakeSimpleProducer() {
|
|||
|
||||
// Visitor accepting method for IRNodePass
|
||||
Status TFRecordNode::Accept(IRNodePass *p, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(p);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Downcast shared pointer then call visitor
|
||||
return p->Visit(shared_from_base<TFRecordNode>(), modified);
|
||||
}
|
||||
|
||||
// Visitor accepting method for IRNodePass
|
||||
Status TFRecordNode::AcceptAfter(IRNodePass *const p, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(p);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Downcast shared pointer then call visitor
|
||||
return p->VisitAfter(shared_from_base<TFRecordNode>(), modified);
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ class JaggedConnector : public Connector<TensorRow> {
|
|||
}
|
||||
|
||||
Status Pop(int32_t worker_id, TensorRow *result) noexcept override {
|
||||
RETURN_UNEXPECTED_IF_NULL(result);
|
||||
{
|
||||
MS_ASSERT(worker_id < num_consumers_);
|
||||
std::unique_lock<std::mutex> lock(m_);
|
||||
|
@ -53,7 +54,7 @@ class JaggedConnector : public Connector<TensorRow> {
|
|||
}
|
||||
|
||||
RETURN_IF_NOT_OK(queues_[pop_from_]->PopFront(result));
|
||||
if (result->eoe()) {
|
||||
if (result != nullptr && result->eoe()) {
|
||||
is_queue_finished_[pop_from_] = true;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,12 +32,14 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
|
||||
Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
std::vector<std::shared_ptr<TensorOperation>> ops = node->operations();
|
||||
|
||||
// start temporary code, to deal with pre-built TensorOperation
|
||||
std::vector<std::string> pattern = {kDecodeOp, kRandomCropAndResizeOp};
|
||||
auto itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(),
|
||||
[](auto op, const std::string &nm) { return op->Name() == nm; });
|
||||
[](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; });
|
||||
if (itr != ops.end()) {
|
||||
MS_LOG(WARNING) << "Fusing pre-build Decode and RandomCropResize into one pre-build.";
|
||||
auto fused_op = dynamic_cast<RandomCropAndResizeOp *>((*(itr + 1))->Build().get());
|
||||
|
@ -52,7 +54,7 @@ Status TensorOpFusionPass::Visit(std::shared_ptr<MapNode> node, bool *const modi
|
|||
// logic below is for non-prebuilt TensorOperation
|
||||
pattern = {vision::kDecodeOperation, vision::kRandomResizedCropOperation};
|
||||
itr = std::search(ops.begin(), ops.end(), pattern.begin(), pattern.end(),
|
||||
[](auto op, const std::string &nm) { return op->Name() == nm; });
|
||||
[](auto op, const std::string &nm) { return op != nullptr ? op->Name() == nm : false; });
|
||||
|
||||
// return here if no pattern is found
|
||||
RETURN_OK_IF_TRUE(itr == ops.end());
|
||||
|
|
|
@ -27,6 +27,8 @@ namespace dataset {
|
|||
|
||||
// this will become the RootNode:DatasetNode when it is turned on
|
||||
Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(root_ir);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
uint8_t config = GlobalContext::config_manager()->get_auto_worker_config();
|
||||
|
||||
OpWeightPass pass(kOpWeightConfigs[config < kOpWeightConfigs.size() ? config : 0]);
|
||||
|
@ -46,6 +48,8 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con
|
|||
// get the maximum weight of all the ops, this value is used to ensure the ratio of num_workers between ops
|
||||
float max_weight = 0;
|
||||
for (const auto &p : pass.weight_profile_) max_weight = std::max(max_weight, p.second);
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(max_weight != 0, "Internal error, doesn't allow divide zero.");
|
||||
RETURN_IF_NOT_OK(pass.Run(root_ir, modified));
|
||||
constexpr size_t max_num_ops = 3;
|
||||
if (pass.parallel_ops_.size() > max_num_ops) {
|
||||
|
@ -53,6 +57,7 @@ Status AutoWorkerPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *con
|
|||
<< "1 batch and 1 map. AutoNumWorker may not be optimal for usage on complex pipelines.";
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(pass.weight_sum_ != 0, "Internal error, doesn't allow divide zero.");
|
||||
for (auto &p : pass.parallel_ops_) {
|
||||
// get the num worker via the weight ratio
|
||||
int32_t num_workers = std::ceil((thread_cnt_ * p.second) / (pass.weight_sum_ * num_shards));
|
||||
|
|
|
@ -33,6 +33,8 @@ RepeatPass::RepeatPass()
|
|||
|
||||
// Identifies the subtree below this node as being in a repeated path of the tree.
|
||||
Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// If this is an infinite repeat under infinite repeat/epoch, adjust current num_repeats_.
|
||||
// Otherwise, after multiplication it would become positive and this repeat wouldn't run infinitely.
|
||||
if (node->Count() == DatasetOp::kInfiniteRepeat && num_repeats_ < 0) {
|
||||
|
@ -56,6 +58,8 @@ Status RepeatPass::Visit(std::shared_ptr<RepeatNode> node, bool *const modified)
|
|||
|
||||
// Identifies the subtree below this node as being in a repeated path of the tree.
|
||||
Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Get the total number of epochs from the EpochCtrlOp parameter
|
||||
num_epochs_ = node->Count();
|
||||
// Every node below this EpochCtrlOp should be repeated for num_epochs_ times.
|
||||
|
@ -69,6 +73,8 @@ Status RepeatPass::Visit(std::shared_ptr<EpochCtrlNode> node, bool *const modifi
|
|||
#ifndef ENABLE_ANDROID
|
||||
// Identifies the subtree below this node as being in a cache merge path
|
||||
Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Turn on the flag that we're under a merge op
|
||||
is_merge_ = true;
|
||||
return Status::OK();
|
||||
|
@ -76,6 +82,8 @@ Status RepeatPass::Visit(std::shared_ptr<CacheMergeNode> node, bool *const modif
|
|||
|
||||
// Identifies the subtree below this node as being cached
|
||||
Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Turn on the flag that we're under a merge op
|
||||
is_cached_ = true;
|
||||
return Status::OK();
|
||||
|
@ -84,6 +92,8 @@ Status RepeatPass::Visit(std::shared_ptr<CacheNode> node, bool *const modified)
|
|||
|
||||
// Hooks up any identified eoe nodes under this repeat.
|
||||
Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// We are a repeat op in the descendant tree of a merge op, then we take the saved lookup up
|
||||
// and set its total repeats. It is important that the op is removed from the save area,
|
||||
// because the merge op above us may also take action on it later for a different case when
|
||||
|
@ -103,12 +113,16 @@ Status RepeatPass::VisitAfter(std::shared_ptr<RepeatNode> node, bool *const modi
|
|||
// The total repeats of nodes above this Repeat(n) have nothing to do with this RepeatOp's parameter n.
|
||||
// But num_repeats_ has been multiplied by n during this Repeat(n)'s PreRunOnNode,
|
||||
// so we divide num_repeats_ by n to be able to correctly set total repeats for nodes above this RepeatOp.
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0.");
|
||||
num_repeats_ /= node->Count();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Hooks up any identified eoe nodes under this repeat.
|
||||
Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->Count() != 0, "Invalid data, the number of node can't be 0.");
|
||||
node->SetTotalRepeats(num_repeats_);
|
||||
node->SetNumEpochs(num_epochs_);
|
||||
// We finish the walk of this EpochCtrl's descendent nodes.
|
||||
|
@ -119,6 +133,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<EpochCtrlNode> node, bool *const m
|
|||
// All operators have a flag that might be set related to the repeat and any leaf nodes need to be set up
|
||||
// for use with a controlling repeat above it.
|
||||
Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// If we are under a cache op, then save ourselves to the cached op stack.
|
||||
if (is_cached_) {
|
||||
AddToCachedNodeStack(node);
|
||||
|
@ -132,6 +148,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<DatasetNode> node, bool *const mod
|
|||
#ifndef ENABLE_ANDROID
|
||||
// CacheOp removes previous leaf ops and replaces them with itself
|
||||
Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
is_cached_ = false;
|
||||
|
||||
// if we are a cache within a repeat path of the tree, then adjust the total repeats and total epochs for cached ops.
|
||||
|
@ -153,6 +171,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheNode> node, bool *const modif
|
|||
|
||||
// Turns off the tracking for operations under merge op
|
||||
Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// If there was not any repeat in the merge cache miss leg, then the cache_lookup
|
||||
// would not have been consumed yet. In that case, we need to set its total repeats for it.
|
||||
if (cache_lookup_) {
|
||||
|
@ -168,6 +188,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheMergeNode> node, bool *const
|
|||
|
||||
// Saves the lookup up in case it needs to be referenced by a repeat
|
||||
Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
if (!node->IsLeaf()) {
|
||||
// By definition, the CacheLookup must be a leaf op. Make that clear here.
|
||||
RETURN_STATUS_UNEXPECTED("CacheLookupOp must be a leaf node!");
|
||||
|
@ -185,6 +207,8 @@ Status RepeatPass::VisitAfter(std::shared_ptr<CacheLookupNode> node, bool *const
|
|||
#endif
|
||||
|
||||
Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
// Set total repeats and total epochs for the TransferNode
|
||||
node->SetTotalRepeats(num_epochs_);
|
||||
node->SetNumEpochs(num_epochs_);
|
||||
|
@ -192,7 +216,12 @@ Status RepeatPass::VisitAfter(std::shared_ptr<TransferNode> node, bool *const mo
|
|||
}
|
||||
|
||||
// Adds an operator to the cached operator stack save area
|
||||
void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) { cached_node_stacks_.push(node); }
|
||||
void RepeatPass::AddToCachedNodeStack(const std::shared_ptr<DatasetNode> &node) {
|
||||
if (node == nullptr) {
|
||||
return;
|
||||
}
|
||||
cached_node_stacks_.push(node);
|
||||
}
|
||||
|
||||
// Pops an operator from the cached operator stack save area
|
||||
std::shared_ptr<DatasetNode> RepeatPass::PopFromCachedNodeStack() {
|
||||
|
|
|
@ -29,6 +29,10 @@ EpochCtrlPass::InjectionFinder::InjectionFinder(std::shared_ptr<DatasetNode> nod
|
|||
|
||||
// Performs finder work for BuildVocabOp that has special rules about epoch control injection
|
||||
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0,
|
||||
"Invalid data, the node of child should greater than zero.");
|
||||
// The injection is at the child of the root node
|
||||
injection_point_ = node->Children()[0];
|
||||
num_epochs_ = node->num_epochs();
|
||||
|
@ -37,6 +41,8 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<RootNode> node, boo
|
|||
|
||||
// Performs finder work for BuildVocabOp that has special rules about epoch control injection
|
||||
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
injection_point_ = nullptr;
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -44,12 +50,18 @@ Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildVocabNode> nod
|
|||
#ifndef ENABLE_ANDROID
|
||||
// Performs finder work for BuildSentencePieceVocabNode that has special rules about epoch control injection
|
||||
Status EpochCtrlPass::InjectionFinder::Visit(std::shared_ptr<BuildSentenceVocabNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
injection_point_ = nullptr;
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status EpochCtrlPass::InjectionFinder::VisitAfter(std::shared_ptr<TransferNode> node, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(node->Children().size() > 0,
|
||||
"Invalid data, the node of child should greater than zero.");
|
||||
// Assumption: There is only one TransferNode in a pipeline. This assumption is not validated here.
|
||||
// Move the injection point to the child of this node.
|
||||
injection_point_ = node->Children()[0];
|
||||
|
@ -61,6 +73,8 @@ EpochCtrlPass::EpochCtrlPass() {}
|
|||
|
||||
// Runs an injection pass to inject in operators needed at the pre pass stage
|
||||
Status EpochCtrlPass::RunOnTree(std::shared_ptr<DatasetNode> root_ir, bool *const modified) {
|
||||
RETURN_UNEXPECTED_IF_NULL(root_ir);
|
||||
RETURN_UNEXPECTED_IF_NULL(modified);
|
||||
MS_LOG(INFO) << "Pre pass: Injection pass started.";
|
||||
|
||||
// First, run the finder to perform any injection info before we can go ahead to drive the op injection work.
|
||||
|
|
|
@ -53,8 +53,8 @@ json ConnectorSize::ParseOpInfo(const DatasetOp &node, const std::vector<int32_t
|
|||
|
||||
auto children = node.Children();
|
||||
std::vector<int32_t> children_id;
|
||||
std::transform(children.begin(), children.end(), std::back_inserter(children_id),
|
||||
[](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); });
|
||||
(void)std::transform(children.begin(), children.end(), std::back_inserter(children_id),
|
||||
[](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op->id(); });
|
||||
if (!children_id.empty()) {
|
||||
json_node["children"] = children_id;
|
||||
}
|
||||
|
|
|
@ -29,6 +29,9 @@ namespace dataset {
|
|||
|
||||
// temporary helper
|
||||
int ConnectorThroughput::InitNodes() {
|
||||
if (tree_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
auto it = (*tree_).begin();
|
||||
return it.NumNodes();
|
||||
}
|
||||
|
@ -43,15 +46,16 @@ Status ConnectorThroughput::Sample() {
|
|||
out_row_count_row[col] = cur_out_rows_count;
|
||||
auto sz = timestamps_.size();
|
||||
cur_time = std::chrono::steady_clock::now();
|
||||
double dt = 0;
|
||||
double data_time = 0;
|
||||
if (sz > 1) {
|
||||
auto _dt = std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]);
|
||||
dt = std::chrono::duration<double>(_dt).count();
|
||||
auto full_time =
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(timestamps_[0][sz - 1] - timestamps_[0][sz - 2]);
|
||||
data_time = std::chrono::duration<double>(full_time).count();
|
||||
}
|
||||
auto prev_out_rows_count = out_row_count_table_[col][out_row_count_table_.size() - 1];
|
||||
if (dt != 0) {
|
||||
if (data_time != 0) {
|
||||
const int32_t multiplier = 1000;
|
||||
auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * dt);
|
||||
auto thr = (cur_out_rows_count - prev_out_rows_count) / (multiplier * data_time);
|
||||
throughput_row[col] = thr;
|
||||
} else {
|
||||
throughput_row[col] = 0;
|
||||
|
@ -70,7 +74,7 @@ json ConnectorThroughput::ParseOpInfo(const DatasetOp &node, const std::vector<d
|
|||
auto children = node.Children();
|
||||
std::vector<int32_t> children_id;
|
||||
std::transform(children.begin(), children.end(), std::back_inserter(children_id),
|
||||
[](std::shared_ptr<DatasetOp> op) -> int32_t { return op->id(); });
|
||||
[](const std::shared_ptr<DatasetOp> &op) -> int32_t { return op ? op->id() : 0; });
|
||||
json json_node;
|
||||
json_node["op_id"] = node.id();
|
||||
json_node["op_type"] = node.Name();
|
||||
|
@ -100,8 +104,10 @@ Status ConnectorThroughput::SaveToFile() {
|
|||
int col = 0;
|
||||
for (auto &node : *tree_) {
|
||||
std::vector<double> throughput;
|
||||
for (auto i = 0; i < throughput_.size(); i++) {
|
||||
throughput.push_back(throughput_[col][i]);
|
||||
if (throughput_.size() > col) {
|
||||
for (auto i = 0; i < throughput_[col].size(); i++) {
|
||||
throughput.push_back(throughput_[col][i]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!path.Exists()) {
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
@ -33,8 +33,8 @@
|
|||
using json = nlohmann::json;
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
bool BaseCpu::fetched_all_process_shared = false;
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared = {};
|
||||
bool BaseCpu::fetched_all_process_shared_ = false;
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared_ = {};
|
||||
|
||||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
|
||||
#define USING_LINUX
|
||||
|
@ -46,8 +46,8 @@ BaseCpu::BaseCpu() {
|
|||
pre_cpu_stat_.io_stat_ = 0;
|
||||
pre_cpu_stat_.idle_stat_ = 0;
|
||||
pre_cpu_stat_.total_stat_ = 0;
|
||||
fetched_all_process = false;
|
||||
pre_fetched_state = false;
|
||||
fetched_all_process_ = false;
|
||||
pre_fetched_state_ = false;
|
||||
cpu_processor_num_ = 0;
|
||||
}
|
||||
|
||||
|
@ -157,6 +157,7 @@ Status DeviceCpu::Collect(const ExecutionTree *tree) {
|
|||
return Status::OK();
|
||||
}
|
||||
Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
|
||||
RETURN_UNEXPECTED_IF_NULL(name);
|
||||
name->clear();
|
||||
name->append("device_info");
|
||||
int total_samples = cpu_util_.size();
|
||||
|
@ -221,6 +222,7 @@ Status DeviceCpu::SaveToFile(const std::string &file_path) {
|
|||
|
||||
Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
|
||||
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat) {
|
||||
RETURN_UNEXPECTED_IF_NULL(op_stat);
|
||||
pid_t pid = 0;
|
||||
#if defined(USING_LINUX)
|
||||
pid = syscall(SYS_getpid);
|
||||
|
@ -257,11 +259,12 @@ Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
|
|||
}
|
||||
|
||||
Status OperatorCpu::Collect(const ExecutionTree *tree) {
|
||||
RETURN_UNEXPECTED_IF_NULL(tree);
|
||||
if (first_collect_) {
|
||||
for (auto iter = tree->begin(); iter != tree->end(); ++iter) {
|
||||
id_count_++;
|
||||
op_name[iter->id()] = iter->NameWithID();
|
||||
op_parallel_workers[iter->id()] = iter->num_workers();
|
||||
op_name_[iter->id()] = iter->NameWithID();
|
||||
op_parallel_workers_[iter->id()] = iter->num_workers();
|
||||
}
|
||||
#if defined(USING_LINUX)
|
||||
cpu_processor_num_ = get_nprocs_conf();
|
||||
|
@ -269,34 +272,34 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
|
|||
}
|
||||
|
||||
// Obtain the op and thread mapping
|
||||
op_thread.clear();
|
||||
op_thread_.clear();
|
||||
List<Task> allTasks = tree->AllTasks()->GetTask();
|
||||
for (auto &task1 : allTasks) {
|
||||
int32_t op_id = task1.get_operator_id();
|
||||
op_thread[op_id].emplace_back(task1.get_linux_id());
|
||||
op_thread_[op_id].emplace_back(task1.get_linux_id());
|
||||
}
|
||||
|
||||
// add process id into op_thread
|
||||
if (!fetched_all_process) {
|
||||
if (!fetched_all_process_) {
|
||||
{
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
py::module ds = py::module::import("mindspore.dataset.engine.datasets");
|
||||
py::tuple process_info = ds.attr("_get_operator_process")();
|
||||
py::dict sub_process = py::reinterpret_borrow<py::dict>(process_info[0]);
|
||||
fetched_all_process = py::reinterpret_borrow<py::bool_>(process_info[1]);
|
||||
fetched_all_process_ = py::reinterpret_borrow<py::bool_>(process_info[1]);
|
||||
// parse dict value
|
||||
op_process = toIntMap(sub_process);
|
||||
BaseCpu::op_process_shared = op_process;
|
||||
BaseCpu::fetched_all_process_shared = fetched_all_process;
|
||||
op_process_ = toIntMap(sub_process);
|
||||
BaseCpu::op_process_shared_ = op_process_;
|
||||
BaseCpu::fetched_all_process_shared_ = fetched_all_process_;
|
||||
}
|
||||
|
||||
// judge whether there is device_que operator, if so operator id may need increase by one, temp use directly
|
||||
for (auto item : op_process) {
|
||||
for (auto item : op_process_) {
|
||||
if (!item.second.empty()) {
|
||||
if (op_thread.find(item.first) != op_thread.end()) {
|
||||
op_thread[item.first].insert(op_thread[item.first].end(), item.second.begin(), item.second.end());
|
||||
if (op_thread_.find(item.first) != op_thread_.end()) {
|
||||
op_thread_[item.first].insert(op_thread_[item.first].end(), item.second.begin(), item.second.end());
|
||||
} else {
|
||||
op_thread[item.first] = item.second;
|
||||
op_thread_[item.first] = item.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -310,16 +313,15 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
|
|||
if (!first_collect_) {
|
||||
// obtain all the op id in current tasks
|
||||
std::vector<int32_t> total_op_id;
|
||||
for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) {
|
||||
total_op_id.emplace_back(iter->first);
|
||||
}
|
||||
(void)std::transform(op_thread_.begin(), op_thread_.end(), std::back_inserter(total_op_id),
|
||||
[](const auto &iter) { return iter.first; });
|
||||
|
||||
// iter all the op, and obtain the CPU utilization of each operator
|
||||
for (auto op_id = -1; op_id < id_count_; op_id++) {
|
||||
float user_util = 0, sys_util = 0;
|
||||
auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id);
|
||||
if (iter != total_op_id.end()) {
|
||||
for (auto thread_id : op_thread[op_id]) {
|
||||
for (auto thread_id : op_thread_[op_id]) {
|
||||
if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) {
|
||||
user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 /
|
||||
(total_stat_ - pre_total_stat_) * 100;
|
||||
|
@ -329,7 +331,7 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
|
|||
}
|
||||
}
|
||||
CpuOpUtil info;
|
||||
info.op_id = op_id;
|
||||
info.op_id_ = op_id;
|
||||
info.sys_utilization_ = sys_util;
|
||||
info.user_utilization_ = user_util;
|
||||
cpu_step_util_.emplace_back(info);
|
||||
|
@ -337,10 +339,10 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
|
|||
cpu_op_util_.emplace_back(cpu_step_util_);
|
||||
} else {
|
||||
// mainly obtain the init CPU execute time in first collect
|
||||
for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) {
|
||||
int32_t op_id = iter->first;
|
||||
for (auto thread_id_ : iter->second) {
|
||||
// ignore errors in the first collect
|
||||
for (const auto &iter : op_thread_) {
|
||||
int32_t op_id = iter.first;
|
||||
for (auto thread_id_ : iter.second) {
|
||||
// ParseCpuInfo may execute failed for cpu data not ready, but we still get next thread cpu info
|
||||
(void)ParseCpuInfo(op_id, thread_id_, &op_stat_);
|
||||
}
|
||||
}
|
||||
|
@ -355,6 +357,8 @@ Status OperatorCpu::Collect(const ExecutionTree *tree) {
|
|||
}
|
||||
|
||||
Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
|
||||
RETURN_UNEXPECTED_IF_NULL(name);
|
||||
RETURN_UNEXPECTED_IF_NULL(extra_message);
|
||||
int total_samples = cpu_op_util_.size();
|
||||
|
||||
// Only analyze the middle half of the samples
|
||||
|
@ -374,15 +378,15 @@ Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string
|
|||
sum += cpu_op_util_[i][index].sys_utilization_;
|
||||
}
|
||||
if ((end_analyze - start_analyze) > 0) {
|
||||
op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers[op_id] * (end_analyze - start_analyze));
|
||||
op_util = 1.0 * sum * cpu_processor_num_ / (op_parallel_workers_[op_id] * (end_analyze - start_analyze));
|
||||
}
|
||||
if (op_util > *utilization) {
|
||||
*utilization = op_util;
|
||||
name->clear();
|
||||
name->append(op_name[op_id]);
|
||||
(void)name->append(op_name_[op_id]);
|
||||
}
|
||||
extra_message->append(op_name[op_id] + " utiliization per thread: " + std::to_string(op_util) + "% (" +
|
||||
std::to_string(op_parallel_workers[op_id]) + " parallel_workers); ");
|
||||
(void)extra_message->append(op_name_[op_id] + " utilization per thread: " + std::to_string(op_util) + "% (" +
|
||||
std::to_string(op_parallel_workers_[op_id]) + " parallel_workers); ");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -428,24 +432,24 @@ Status ProcessCpu::ParseCpuInfo() {
|
|||
uint64_t total_stat_;
|
||||
RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_));
|
||||
|
||||
if (!pre_fetched_state) {
|
||||
process_id.clear();
|
||||
if (!pre_fetched_state_) {
|
||||
process_id_.clear();
|
||||
pid_t main_pid = 0;
|
||||
#if defined(USING_LINUX)
|
||||
main_pid = syscall(SYS_getpid);
|
||||
#endif
|
||||
process_id.emplace_back(main_pid);
|
||||
op_process = BaseCpu::op_process_shared;
|
||||
fetched_all_process = BaseCpu::fetched_all_process_shared;
|
||||
for (auto item : op_process) {
|
||||
for (auto id : item.second) {
|
||||
process_id.emplace_back(id);
|
||||
process_id_.emplace_back(main_pid);
|
||||
op_process_ = BaseCpu::op_process_shared_;
|
||||
fetched_all_process_ = BaseCpu::fetched_all_process_shared_;
|
||||
for (const auto &item : op_process_) {
|
||||
for (const auto &id : item.second) {
|
||||
process_id_.emplace_back(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float user_util = 0, sys_util = 0;
|
||||
for (auto pid : process_id) {
|
||||
for (const auto &pid : process_id_) {
|
||||
std::string stat_path = "/proc/" + std::to_string(pid) + "/stat";
|
||||
|
||||
std::ifstream file(stat_path);
|
||||
|
@ -479,11 +483,12 @@ Status ProcessCpu::ParseCpuInfo() {
|
|||
}
|
||||
pre_total_stat_ = total_stat_;
|
||||
first_collect_ = false;
|
||||
pre_fetched_state = fetched_all_process;
|
||||
pre_fetched_state_ = fetched_all_process_;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ProcessCpu::Collect(const ExecutionTree *tree) {
|
||||
RETURN_UNEXPECTED_IF_NULL(tree);
|
||||
if (first_collect_) {
|
||||
#if defined(USING_LINUX)
|
||||
cpu_processor_num_ = get_nprocs_conf();
|
||||
|
@ -495,6 +500,9 @@ Status ProcessCpu::Collect(const ExecutionTree *tree) {
|
|||
}
|
||||
|
||||
Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
|
||||
RETURN_UNEXPECTED_IF_NULL(name);
|
||||
RETURN_UNEXPECTED_IF_NULL(utilization);
|
||||
RETURN_UNEXPECTED_IF_NULL(extra_message);
|
||||
name->clear();
|
||||
name->append("process_info");
|
||||
int total_samples = process_util_.size();
|
||||
|
|
|
@ -49,7 +49,7 @@ typedef struct CpuInfo_s {
|
|||
typedef struct CpuOpInfo_s {
|
||||
float user_utilization_;
|
||||
float sys_utilization_;
|
||||
int32_t op_id;
|
||||
int32_t op_id_;
|
||||
} CpuOpUtil;
|
||||
|
||||
// CPU utilization of process
|
||||
|
@ -78,11 +78,11 @@ class BaseCpu {
|
|||
protected:
|
||||
std::vector<CpuUtil> cpu_util_;
|
||||
CpuStat pre_cpu_stat_;
|
||||
static bool fetched_all_process_shared;
|
||||
static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared;
|
||||
bool fetched_all_process;
|
||||
bool pre_fetched_state;
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> op_process;
|
||||
static bool fetched_all_process_shared_;
|
||||
static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared_;
|
||||
bool fetched_all_process_;
|
||||
bool pre_fetched_state_;
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> op_process_;
|
||||
int32_t cpu_processor_num_;
|
||||
};
|
||||
|
||||
|
@ -136,9 +136,9 @@ class OperatorCpu : public BaseCpu {
|
|||
bool first_collect_;
|
||||
|
||||
// Store the id and its corresponding threads.
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> op_thread;
|
||||
std::unordered_map<int32_t, std::string> op_name;
|
||||
std::unordered_map<int32_t, int32_t> op_parallel_workers;
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> op_thread_;
|
||||
std::unordered_map<int32_t, std::string> op_name_;
|
||||
std::unordered_map<int32_t, int32_t> op_parallel_workers_;
|
||||
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_;
|
||||
uint64_t pre_total_stat_;
|
||||
int32_t id_count_;
|
||||
|
@ -161,7 +161,7 @@ class ProcessCpu : public BaseCpu {
|
|||
std::vector<CpuProcessUtil> process_util_;
|
||||
uint64_t pre_total_stat_;
|
||||
std::unordered_map<int64_t, CpuOpStat> pre_process_stat_;
|
||||
std::vector<pid_t> process_id;
|
||||
std::vector<pid_t> process_id_;
|
||||
};
|
||||
|
||||
// Sampling CPU information
|
||||
|
|
|
@ -52,7 +52,9 @@ class PerfData {
|
|||
void AddSample(const T &row) {
|
||||
auto i = 0;
|
||||
for (const auto &e : row) {
|
||||
data_[i++].push_back(e);
|
||||
if (data_.size() > i) {
|
||||
data_[i++].push_back(e);
|
||||
}
|
||||
}
|
||||
counter_++;
|
||||
}
|
||||
|
@ -62,7 +64,9 @@ class PerfData {
|
|||
auto Row(dsize_t idx) {
|
||||
std::vector<V> row(n_cols_);
|
||||
for (auto i = 0; i < n_cols_; i++) {
|
||||
row[i] = data_[i][idx];
|
||||
if (data_.size() > i && data_[i].size() > idx) {
|
||||
row[i] = data_[i][idx];
|
||||
}
|
||||
}
|
||||
return row;
|
||||
}
|
||||
|
|
|
@ -51,6 +51,7 @@ Status Tracing::SaveToFile() {
|
|||
}
|
||||
|
||||
Status Sampling::ReadJson(nlohmann::json *output) {
|
||||
RETURN_UNEXPECTED_IF_NULL(output);
|
||||
Path path = Path(file_path_);
|
||||
if (path.Exists()) {
|
||||
MS_LOG(DEBUG) << file_path_ << " exists";
|
||||
|
|
|
@ -22,6 +22,8 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
|
||||
Status Serdes::SaveToJSON(std::shared_ptr<DatasetNode> node, const std::string &filename, nlohmann::json *out_json) {
|
||||
RETURN_UNEXPECTED_IF_NULL(node);
|
||||
RETURN_UNEXPECTED_IF_NULL(out_json);
|
||||
// Dump attributes of current node to json string
|
||||
nlohmann::json args;
|
||||
RETURN_IF_NOT_OK(node->to_json(&args));
|
||||
|
|
|
@ -48,6 +48,7 @@ TreeAdapter::TreeAdapter(UsageFlag usage) : usage_(usage), launched_(false), tre
|
|||
}
|
||||
|
||||
Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
|
||||
RETURN_UNEXPECTED_IF_NULL(ir);
|
||||
// Vector of actions in pre-pass phase
|
||||
std::vector<std::unique_ptr<IRPass>> actions;
|
||||
|
||||
|
@ -73,6 +74,7 @@ Status TreeAdapter::PrePass(std::shared_ptr<DatasetNode> ir) {
|
|||
}
|
||||
|
||||
Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
|
||||
RETURN_UNEXPECTED_IF_NULL(ir);
|
||||
// Vector of optimizations
|
||||
std::vector<std::unique_ptr<IRNodePass>> optimizations;
|
||||
MS_LOG(INFO) << "Running optimization pass loops";
|
||||
|
@ -89,6 +91,7 @@ Status TreeAdapter::Optimize(std::shared_ptr<DatasetNode> ir) {
|
|||
}
|
||||
|
||||
Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
|
||||
RETURN_UNEXPECTED_IF_NULL(ir);
|
||||
// Vector of actions in post-pass phase
|
||||
std::vector<std::unique_ptr<IRPass>> actions;
|
||||
MS_LOG(INFO) << "Running post pass loops.";
|
||||
|
@ -118,6 +121,9 @@ Status TreeAdapter::PostPass(std::shared_ptr<DatasetNode> ir) {
|
|||
}
|
||||
|
||||
Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) {
|
||||
RETURN_UNEXPECTED_IF_NULL(ir);
|
||||
RETURN_UNEXPECTED_IF_NULL(op);
|
||||
RETURN_UNEXPECTED_IF_NULL(tree_);
|
||||
// Build the DatasetOp ExecutionTree from the optimized IR tree
|
||||
std::vector<std::shared_ptr<DatasetOp>> ops;
|
||||
RETURN_IF_NOT_OK(ir->Build(&ops));
|
||||
|
@ -133,7 +139,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std
|
|||
}
|
||||
|
||||
// Build the children of IR, once they return, add the return value to *op
|
||||
for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) {
|
||||
for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) {
|
||||
std::shared_ptr<DatasetOp> child_op;
|
||||
RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op));
|
||||
RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops
|
||||
|
@ -143,6 +149,7 @@ Status TreeAdapter::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std
|
|||
}
|
||||
|
||||
Status TreeAdapter::Build(std::shared_ptr<DatasetNode> root_ir) {
|
||||
RETURN_UNEXPECTED_IF_NULL(root_ir);
|
||||
// This will evolve in the long run
|
||||
tree_ = std::make_unique<ExecutionTree>();
|
||||
// disable profiling if this is only a getter pass
|
||||
|
|
|
@ -22,6 +22,8 @@ namespace dataset {
|
|||
TreeAdapterLite::TreeAdapterLite() : root_(nullptr) { tree_ = std::make_unique<ExecutionTree>(); }
|
||||
|
||||
Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir, std::shared_ptr<DatasetOp> *const op) {
|
||||
RETURN_UNEXPECTED_IF_NULL(ir);
|
||||
RETURN_UNEXPECTED_IF_NULL(op);
|
||||
// Build the DatasetOp ExecutionTree from the optimized IR tree
|
||||
std::vector<std::shared_ptr<DatasetOp>> ops;
|
||||
RETURN_IF_NOT_OK(ir->Build(&ops));
|
||||
|
@ -41,7 +43,7 @@ Status TreeAdapterLite::BuildExecutionTreeRecur(std::shared_ptr<DatasetNode> ir,
|
|||
}
|
||||
|
||||
// Build the children of IR, once they return, add the return value to *op
|
||||
for (std::shared_ptr<DatasetNode> child_ir : ir->Children()) {
|
||||
for (const std::shared_ptr<DatasetNode> &child_ir : ir->Children()) {
|
||||
std::shared_ptr<DatasetOp> child_op;
|
||||
RETURN_IF_NOT_OK(BuildExecutionTreeRecur(child_ir, &child_op));
|
||||
RETURN_IF_NOT_OK(ops.back()->AddChild(child_op)); // append children to the last of ops
|
||||
|
@ -60,6 +62,7 @@ Status TreeAdapterLite::BuildTree(std::shared_ptr<DatasetNode> root_ir) {
|
|||
Status TreeAdapterLite::GetNextRow(TensorRow *const row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(root_);
|
||||
RETURN_IF_NOT_OK(root_->GetNextRowPullMode(row));
|
||||
RETURN_UNEXPECTED_IF_NULL(row);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "minddata/dataset/core/device_tensor.h"
|
||||
#include "minddata/dataset/core/device_resource.h"
|
||||
|
@ -30,7 +31,8 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class DvppNormalizeOp : public TensorOp {
|
||||
public:
|
||||
explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std) : mean_(mean), std_(std) {}
|
||||
explicit DvppNormalizeOp(std::vector<float> mean, std::vector<float> std)
|
||||
: mean_(std::move(mean)), std_(std::move(std)) {}
|
||||
|
||||
~DvppNormalizeOp() = default;
|
||||
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
#ifndef ENABLE_DVPP_INTERFACE
|
||||
#define ENABLE_DVPP_INTERFACE
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
|
|
@ -13,13 +13,14 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/MDAclProcess.h"
|
||||
|
||||
#include <thread>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/time.h>
|
||||
#include "minddata/dataset/include/dataset/constants.h"
|
||||
#include "minddata/dataset/core/tensor_shape.h"
|
||||
#include "minddata/dataset/kernels/image/image_utils.h"
|
||||
#include "MDAclProcess.h"
|
||||
#include <sys/time.h>
|
||||
#include <thread>
|
||||
#include <sys/stat.h>
|
||||
|
||||
namespace {
|
||||
const int BUFFER_SIZE = 2048;
|
||||
|
|
|
@ -17,25 +17,25 @@
|
|||
#define MDACLMANAGER_H
|
||||
|
||||
#include <climits>
|
||||
#include <string>
|
||||
#include <string.h>
|
||||
#include <cstdio>
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <unistd.h>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
#include "acl/acl.h"
|
||||
#include "CommonDataType.h"
|
||||
|
||||
#include "minddata/dataset/core/tensor_shape.h"
|
||||
#include "minddata/dataset/core/data_type.h"
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/DvppCommon.h"
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
|
||||
#include "mindspore/ccsrc/minddata/dataset/core/device_tensor.h"
|
||||
#include "mindspore/ccsrc/minddata/dataset/core/tensor.h"
|
||||
#include "mindspore/core/utils/log_adapter.h"
|
||||
#include "mindspore/ccsrc/minddata/dataset/util/status.h"
|
||||
#include "ErrorCode.h"
|
||||
#include "DvppCommon.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
mode_t SetFileDefaultUmask();
|
||||
|
||||
|
|
|
@ -16,17 +16,18 @@
|
|||
#ifndef RESOURCEMANAGER_H
|
||||
#define RESOURCEMANAGER_H
|
||||
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <climits>
|
||||
#include <cstring>
|
||||
#include <climits>
|
||||
#include <unordered_map>
|
||||
#include <mutex>
|
||||
#include "CommonDataType.h"
|
||||
#include "ErrorCode.h"
|
||||
#include <set>
|
||||
#include <sys/stat.h>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "mindspore/core/utils/log_adapter.h"
|
||||
#include "mindspore/ccsrc/cxx_api/graph/acl/acl_env_guard.h"
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/CommonDataType.h"
|
||||
#include "minddata/dataset/kernels/image/dvpp/utils/ErrorCode.h"
|
||||
|
||||
enum ModelLoadMethod {
|
||||
LOAD_FROM_FILE = 0, // Loading from file, memory of model and weights are managed by ACL
|
||||
|
|
|
@ -48,7 +48,7 @@ static void GetSobelKernel(float *kernel, int flag, int ksize, double scale) {
|
|||
buffer[0] = 1, buffer[1] = -2, buffer[2] = 1;
|
||||
}
|
||||
} else {
|
||||
int old, now;
|
||||
float old, now;
|
||||
buffer[0] = 1;
|
||||
for (int i = 0; i < ksize; i++) {
|
||||
buffer[i + 1] = 0;
|
||||
|
|
|
@ -571,9 +571,8 @@ bool ConvertTo(const LiteMat &src, LiteMat &dst, double scale) {
|
|||
|
||||
if (dst.IsEmpty()) {
|
||||
dst.Init(src.width_, src.height_, src.channel_, LDataType::FLOAT32);
|
||||
} else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_) {
|
||||
return false;
|
||||
} else if (dst.data_type_ != LDataType::FLOAT32) {
|
||||
} else if (src.width_ != dst.width_ || src.height_ != dst.height_ || src.channel_ != dst.channel_ ||
|
||||
dst.data_type_ != LDataType::FLOAT32) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -662,24 +661,16 @@ bool Crop(const LiteMat &src, LiteMat &dst, int x, int y, int w, int h) {
|
|||
}
|
||||
|
||||
static bool CheckZero(const std::vector<float> &vs) {
|
||||
for (int i = 0; i < vs.size(); i++) {
|
||||
if (Equal(vs[i], 0.0f)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
return std::any_of(vs.begin(), vs.end(), [](const float &v) { return Equal(v, 0.0f); });
|
||||
}
|
||||
|
||||
static bool CheckZero(const std::vector<size_t> &vs) {
|
||||
for (int i = 0; i < vs.size(); i++) {
|
||||
if (vs[i] == 0) return true;
|
||||
}
|
||||
return false;
|
||||
return std::any_of(vs.begin(), vs.end(), [](const float &v) { return v == 0; });
|
||||
}
|
||||
|
||||
static bool CheckMeanAndStd(const LiteMat &src, LiteMat &dst, int channel, const std::vector<float> &mean,
|
||||
const std::vector<float> &std) {
|
||||
if (mean.size() == 0 && std.size() == 0) {
|
||||
if (mean.empty() && std.empty()) {
|
||||
return false;
|
||||
}
|
||||
if (src.data_type_ != LDataType::FLOAT32) {
|
||||
|
@ -935,8 +926,8 @@ bool Merge(const std::vector<LiteMat> &mv, LiteMat &dst) {
|
|||
LDataType data_type = mv[0].data_type_;
|
||||
|
||||
// The arrays in list must be single-channel
|
||||
for (int i = 0; i < mv.size(); i++) {
|
||||
if (mv[i].channel_ != 1) return false;
|
||||
if (std::any_of(mv.begin(), mv.end(), [](const LiteMat &m) { return m.channel_ != 1; })) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 1; i < mv.size(); i++) {
|
||||
|
@ -998,7 +989,7 @@ bool Pad(const LiteMat &src, LiteMat &dst, int top, int bottom, int left, int ri
|
|||
return true;
|
||||
}
|
||||
|
||||
std::vector<std::vector<float>> GetDefaultBoxes(BoxesConfig config) {
|
||||
std::vector<std::vector<float>> GetDefaultBoxes(const BoxesConfig config) {
|
||||
size_t size = config.num_default.size();
|
||||
if (size <= 1 || config.feature_size.size() != size || config.steps.size() != size ||
|
||||
config.aspect_rations.size() != size) {
|
||||
|
@ -1116,6 +1107,7 @@ std::vector<int> ApplyNms(const std::vector<std::vector<float>> &all_boxes, std:
|
|||
}
|
||||
}
|
||||
std::vector<int> new_order;
|
||||
new_order.reserve(inds.size());
|
||||
for (int k = 0; k < inds.size(); k++) {
|
||||
new_order.push_back(order[inds[k]]);
|
||||
}
|
||||
|
|
|
@ -283,9 +283,7 @@ void LiteMat::Release() {
|
|||
if (data_ptr_) {
|
||||
AlignFree(data_ptr_);
|
||||
}
|
||||
if (ref_count_) {
|
||||
delete[] ref_count_;
|
||||
}
|
||||
delete[] ref_count_;
|
||||
}
|
||||
data_ptr_ = nullptr;
|
||||
elem_size_ = 0;
|
||||
|
@ -293,7 +291,7 @@ void LiteMat::Release() {
|
|||
height_ = 0;
|
||||
channel_ = 0;
|
||||
c_step_ = 0;
|
||||
ref_count_ = 0;
|
||||
ref_count_ = nullptr;
|
||||
size_ = 0;
|
||||
setSteps(0, 0, 0);
|
||||
}
|
||||
|
@ -418,7 +416,7 @@ inline void SubtractImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d
|
|||
}
|
||||
|
||||
inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
|
||||
if (dst == NULL) {
|
||||
if (dst == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -426,10 +424,7 @@ inline bool CheckSubstract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *
|
|||
return false;
|
||||
}
|
||||
|
||||
if (src_a.data_type_ != src_b.data_type_) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return src_a.data_type_ == src_b.data_type_;
|
||||
}
|
||||
|
||||
bool Subtract(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
|
||||
|
@ -585,7 +580,7 @@ inline void DivideImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *dst
|
|||
}
|
||||
|
||||
inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
|
||||
if (dst == NULL) {
|
||||
if (dst == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -593,10 +588,7 @@ inline bool CheckDivide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst
|
|||
return false;
|
||||
}
|
||||
|
||||
if (src_a.data_type_ != src_b.data_type_) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return src_a.data_type_ == src_b.data_type_;
|
||||
}
|
||||
|
||||
bool Divide(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
|
||||
|
@ -693,7 +685,7 @@ inline void MultiplyImpl(const uint32_t *src0, const uint32_t *src1, uint32_t *d
|
|||
}
|
||||
|
||||
inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
|
||||
if (dst == NULL) {
|
||||
if (dst == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -701,10 +693,7 @@ inline bool CheckMultiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *d
|
|||
return false;
|
||||
}
|
||||
|
||||
if (src_a.data_type_ != src_b.data_type_) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
return src_a.data_type_ == src_b.data_type_;
|
||||
}
|
||||
|
||||
bool Multiply(const LiteMat &src_a, const LiteMat &src_b, LiteMat *dst) {
|
||||
|
|
|
@ -166,15 +166,9 @@ class LDataType {
|
|||
~LDataType() = default;
|
||||
|
||||
inline Type Value() const { return type_; }
|
||||
inline bool operator==(const LDataType &ps) const {
|
||||
if (this->type_ == ps.type_) return true;
|
||||
return false;
|
||||
}
|
||||
inline bool operator==(const LDataType &ps) const { return this->type_ == ps.type_; }
|
||||
|
||||
inline bool operator!=(const LDataType &ps) const {
|
||||
if (this->type_ != ps.type_) return true;
|
||||
return false;
|
||||
}
|
||||
inline bool operator!=(const LDataType &ps) const { return this->type_ != ps.type_; }
|
||||
|
||||
uint8_t SizeInBytes() const {
|
||||
if (type_ < LDataType::NUM_OF_TYPES)
|
||||
|
|
|
@ -381,11 +381,9 @@ bool WarpAffineBilinear(const LiteMat &src, LiteMat &dst, const LiteMat &M, int
|
|||
}
|
||||
if (dst.IsEmpty()) {
|
||||
(void)dst.Init(dst_w, dst_h, src.channel_, LDataType::UINT8);
|
||||
} else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_) {
|
||||
} else if (dst.height_ != dst_h || dst.width_ != dst_w || dst.channel_ != src.channel_ ||
|
||||
dst.data_type_ != LDataType::UINT8) {
|
||||
return false;
|
||||
} else if (dst.data_type_ != LDataType::UINT8) {
|
||||
return false;
|
||||
} else {
|
||||
}
|
||||
|
||||
double IM[6];
|
||||
|
|
|
@ -182,6 +182,8 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
} catch (std::runtime_error &e) {
|
||||
return DestroyDecompressAndReturnError(e.what());
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_w) > crop_x, "invalid crop width");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - crop_h) > crop_y, "invalid crop height");
|
||||
if (crop_x == 0 && crop_y == 0 && crop_w == 0 && crop_h == 0) {
|
||||
crop_w = cinfo.output_width;
|
||||
crop_h = cinfo.output_height;
|
||||
|
@ -190,6 +192,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
return DestroyDecompressAndReturnError("Decode: invalid crop size");
|
||||
}
|
||||
const int mcu_size = cinfo.min_DCT_scaled_size;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(mcu_size != 0, "Invalid data.");
|
||||
unsigned int crop_x_aligned = (crop_x / mcu_size) * mcu_size;
|
||||
unsigned int crop_w_aligned = crop_w + crop_x - crop_x_aligned;
|
||||
try {
|
||||
|
@ -206,8 +209,13 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
|
||||
const int buffer_size = output_tensor->SizeInBytes();
|
||||
JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
|
||||
// stride refers to output tensor, which has 3 components at most
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - skipped_scanlines) > crop_h,
|
||||
"Invalid crop height.");
|
||||
const int max_scanlines_to_read = skipped_scanlines + crop_h;
|
||||
// stride refers to output tensor, which has 3 components at most
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() / crop_w) > kOutNumComponents,
|
||||
"Invalid crop width.");
|
||||
const int stride = crop_w * kOutNumComponents;
|
||||
// offset is calculated for scanlines read from the image, therefore
|
||||
// has the same number of components as the image
|
||||
|
@ -246,6 +254,8 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
|
|||
RETURN_STATUS_UNEXPECTED("Crop: image datatype is not float32 or uint8");
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - y) > h, "Invalid crop height.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - x) > w, "Invalid crop width.");
|
||||
// account for integer overflow
|
||||
if (y < 0 || (y + h) > input->shape()[0] || (y + h) < 0) {
|
||||
RETURN_STATUS_UNEXPECTED(
|
||||
|
@ -410,7 +420,10 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
|
|||
Status ResizePreserve(const TensorRow &inputs, int32_t height, int32_t width, int32_t img_orientation,
|
||||
TensorRow *outputs) {
|
||||
outputs->resize(3);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(inputs.size() > 0,
|
||||
"Invalid input, should greater than 0, but got " + std::to_string(inputs.size()));
|
||||
std::shared_ptr<Tensor> input = inputs[0];
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be greater than 3 dimensions.");
|
||||
LiteMat lite_mat_src(input->shape()[1], input->shape()[0], input->shape()[2],
|
||||
const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
|
||||
GetLiteCVDataType(input->type()));
|
||||
|
@ -537,7 +550,15 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
|
|||
|
||||
std::shared_ptr<Tensor> output_tensor;
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_) > pad_left,
|
||||
"Invalid pad width.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.width_ + pad_left) > pad_right,
|
||||
"Invalid pad width.");
|
||||
int pad_width = lite_mat_rgb.width_ + pad_left + pad_right;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_) > pad_top,
|
||||
"Invalid pad height.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((std::numeric_limits<int32_t>::max() - lite_mat_rgb.height_ + pad_top) > pad_bottom,
|
||||
"Invalid pad height.");
|
||||
int pad_height = lite_mat_rgb.height_ + pad_top + pad_bottom;
|
||||
TensorShape new_shape = TensorShape({pad_height, pad_width, input->shape()[2]});
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, input->type(), &output_tensor));
|
||||
|
@ -721,11 +742,13 @@ Status Affine(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
|
|||
}
|
||||
int height = 0;
|
||||
int width = 0;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(mat.size() <= 6, "Invalid mat shape.");
|
||||
double M[6] = {};
|
||||
for (int i = 0; i < mat.size(); i++) {
|
||||
M[i] = static_cast<double>(mat[i]);
|
||||
}
|
||||
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->shape().Size() >= 3, "Invalid input shape, should be 3.");
|
||||
LiteMat lite_mat_rgb(input->shape()[1], input->shape()[0], input->shape()[2],
|
||||
const_cast<void *>(reinterpret_cast<const void *>(input->GetBuffer())),
|
||||
GetLiteCVDataType(input->type()));
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
const int32_t ResizePreserveAROp::kDefImgorientation = 0;
|
||||
const int32_t ResizePreserveAROp::kDefImgOrientation = 0;
|
||||
|
||||
ResizePreserveAROp::ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation)
|
||||
: height_(height), width_(width), img_orientation_(img_orientation) {}
|
||||
|
|
|
@ -34,9 +34,9 @@ namespace dataset {
|
|||
class ResizePreserveAROp : public TensorOp {
|
||||
public:
|
||||
// Default values, also used by python_bindings.cc
|
||||
static const int32_t kDefImgorientation;
|
||||
static const int32_t kDefImgOrientation;
|
||||
|
||||
ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgorientation);
|
||||
ResizePreserveAROp(int32_t height, int32_t width, int32_t img_orientation = kDefImgOrientation);
|
||||
|
||||
~ResizePreserveAROp() override = default;
|
||||
|
||||
|
|
|
@ -35,9 +35,9 @@ Status ResizeWithBBoxOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
int32_t input_w = input[0]->shape()[1];
|
||||
|
||||
output->resize(2);
|
||||
(*output)[1] = std::move(input[1]); // move boxes over to output
|
||||
(*output)[1] = input[1]; // move boxes over to output
|
||||
|
||||
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input[0]));
|
||||
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(input[0]);
|
||||
|
||||
RETURN_IF_NOT_OK(ResizeOp::Compute(std::static_pointer_cast<Tensor>(input_cv), &(*output)[0]));
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class RgbaToBgrOp : public TensorOp {
|
||||
public:
|
||||
RgbaToBgrOp() {}
|
||||
RgbaToBgrOp() = default;
|
||||
|
||||
~RgbaToBgrOp() override = default;
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class RgbaToRgbOp : public TensorOp {
|
||||
public:
|
||||
RgbaToRgbOp() {}
|
||||
RgbaToRgbOp() = default;
|
||||
|
||||
~RgbaToRgbOp() override = default;
|
||||
|
||||
|
|
|
@ -42,9 +42,10 @@ Status SharpnessOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
|
|||
/// 1, 5, 1,
|
||||
/// 1, 1, 1
|
||||
|
||||
float filterSum = 13.0;
|
||||
const float filterMid = 5.0;
|
||||
const float filterSum = 13.0;
|
||||
cv::Mat filter = cv::Mat(3, 3, CV_32F, cv::Scalar::all(1.0 / filterSum));
|
||||
filter.at<float>(1, 1) = 5.0 / filterSum;
|
||||
filter.at<float>(1, 1) = filterMid / filterSum;
|
||||
|
||||
/// applying filter on channels
|
||||
cv::Mat result = cv::Mat();
|
||||
|
|
|
@ -57,7 +57,7 @@ Status SoftDvppDecodeRandomCropResizeJpegOp::Compute(const std::shared_ptr<Tenso
|
|||
SoftDpCropInfo crop_info;
|
||||
RETURN_IF_NOT_OK(GetCropInfo(input, &crop_info));
|
||||
try {
|
||||
unsigned char *buffer = const_cast<unsigned char *>(input->GetBuffer());
|
||||
auto buffer = const_cast<unsigned char *>(input->GetBuffer());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(buffer != nullptr,
|
||||
"SoftDvppDecodeRandomCropResizeJpeg: the input image buffer is empty.");
|
||||
SoftDpProcsessInfo info;
|
||||
|
|
|
@ -21,9 +21,9 @@
|
|||
#include <random>
|
||||
#include <string>
|
||||
|
||||
#include "./utils/external_soft_dp.h"
|
||||
#include "minddata/dataset/core/tensor.h"
|
||||
#include "minddata/dataset/kernels/image/random_crop_and_resize_op.h"
|
||||
#include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
|
|
@ -32,7 +32,7 @@ class SoftDvppDecodeResizeJpegOp : public TensorOp {
|
|||
: target_height_(target_height), target_width_(target_width) {}
|
||||
|
||||
/// \brief Destructor
|
||||
~SoftDvppDecodeResizeJpegOp() = default;
|
||||
~SoftDvppDecodeResizeJpegOp() override = default;
|
||||
|
||||
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
|
||||
Status OutputShape(const std::vector<TensorShape> &inputs, std::vector<TensorShape> &outputs) override;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
#ifndef EXTERNAL_SOFTDP_H
|
||||
#define EXTERNAL_SOFTDP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <cstdint>
|
||||
|
||||
struct SoftDpProcsessInfo {
|
||||
uint8_t *input_buffer; // input buffer
|
||||
|
|
|
@ -44,11 +44,10 @@ uint32_t DecodeAndResizeJpeg(SoftDpProcsessInfo *soft_dp_process_info) {
|
|||
}
|
||||
|
||||
// use vpc interface to resize and convert RGB, give user output buf and output size.
|
||||
SoftDpCropInfo crop;
|
||||
crop.left = 0;
|
||||
crop.right = vpc_input_info.real_width - 1;
|
||||
crop.up = 0;
|
||||
crop.down = vpc_input_info.real_height - 1;
|
||||
auto crop = SoftDpCropInfo{.left = 0,
|
||||
.right = static_cast<uint32_t>(vpc_input_info.real_width - 1),
|
||||
.up = 0,
|
||||
.down = static_cast<uint32_t>(vpc_input_info.real_height - 1)};
|
||||
|
||||
VpcInfo output;
|
||||
output.addr = soft_dp_process_info->output_buffer;
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#ifndef SOFT_DP_H
|
||||
#define SOFT_DP_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include "./external_soft_dp.h"
|
||||
#include <cstdint>
|
||||
#include "minddata/dataset/kernels/image/soft_dvpp/utils/external_soft_dp.h"
|
||||
|
||||
enum JpegdToVpcFormat {
|
||||
INPUT_VPC_UNKNOWN = -1,
|
||||
|
|
|
@ -25,11 +25,10 @@
|
|||
#define DP_EVENT 0x10000
|
||||
#define DP_DEBUG_LEVEL (DP_EVENT | DP_ERR | DP_WARNING | DP_INFO | DP_DEBUG)
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#if defined(DVPP_UTST) || defined(DEBUG)
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#define DP_LOG(model, level, format, ...) \
|
||||
do { \
|
||||
|
@ -67,6 +66,8 @@
|
|||
|
||||
#include <securec.h>
|
||||
#include <cstdio>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "glog/logging.h"
|
||||
|
||||
template <typename... Args>
|
||||
|
|
|
@ -48,9 +48,5 @@ bool IsDirectory(const std::string &path) {
|
|||
return false;
|
||||
}
|
||||
|
||||
if (S_ISDIR(buf.st_mode)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
return S_ISDIR(buf.st_mode);
|
||||
}
|
||||
|
|
|
@ -40,11 +40,7 @@ T1 AlignDown(T1 num, T2 align) {
|
|||
|
||||
template <typename T>
|
||||
bool IsInTheScope(T num, T left_point, T right_point) {
|
||||
if (num >= left_point && num <= right_point) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
return num >= left_point && num <= right_point;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -109,19 +109,19 @@ int32_t SoftVpc::CheckParamter() {
|
|||
|
||||
uint32_t out_width = out_width_;
|
||||
uint32_t out_height = out_height_;
|
||||
bool flag = (out_width * 32 >= crop_width) ? true : false; // A maximum of 32x zoom-out
|
||||
bool flag = (out_width * 32 >= crop_width); // A maximum of 32x zoom-out
|
||||
VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
|
||||
"Max reduction multiple is 32. Please check left(%u), right(%u), out_width(%u).",
|
||||
left_, right_, out_width); // Up to 16x magnification
|
||||
flag = (crop_width * 16 >= out_width) ? true : false;
|
||||
flag = (crop_width * 16 >= out_width);
|
||||
VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
|
||||
"Max magnification is 16. Please check left(%u), right(%u), out_width(%u).", left_,
|
||||
right_, out_width);
|
||||
flag = (out_height * 32 >= crop_height) ? true : false; // A maximum of 32x zoom-out
|
||||
flag = (out_height * 32 >= crop_height); // A maximum of 32x zoom-out
|
||||
VPC_CHECK_COND_FAIL_PRINT_RETURN(flag, dpFail,
|
||||
"Max reduction multiple is 32. Please check up(%u), down(%u), out_height(%u).", up_,
|
||||
down_, out_height);
|
||||
flag = (crop_height * 16 >= out_height) ? true : false; // Up to 16x magnification
|
||||
flag = (crop_height * 16 >= out_height); // Up to 16x magnification
|
||||
VPC_CHECK_COND_FAIL_PRINT_RETURN(
|
||||
flag, dpFail, "Max magnification is 16. Please check up(%u), down(%u), out_height(%u).", up_, down_, out_height);
|
||||
return dpSucc;
|
||||
|
|
|
@ -34,7 +34,7 @@ class SoftVpc {
|
|||
public:
|
||||
SoftVpc();
|
||||
|
||||
~SoftVpc() {}
|
||||
~SoftVpc() = default;
|
||||
|
||||
/*
|
||||
* @brief : vpc Cropping and Scaling APIs.
|
||||
|
|
|
@ -75,7 +75,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
|
|||
|
||||
// taps_4, the second character in the square brackets is the start address of the array block.
|
||||
if ((*flag_ctl - initBracketNum) % arrTypeNum == 2) {
|
||||
while (1) {
|
||||
while (true) {
|
||||
ss >> yuv_scaler_paraset->scale[cnt].taps_4[index->first_index++];
|
||||
if (ss.fail()) { // rerad failed.
|
||||
index->first_index = index->first_index - 1;
|
||||
|
@ -94,7 +94,7 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
|
|||
|
||||
// taps_6
|
||||
if ((*flag_ctl - initBracketNum) % arrTypeNum == 0) {
|
||||
while (1) {
|
||||
while (true) {
|
||||
ss >> yuv_scaler_paraset->scale[cnt].taps_6[index->second_index++];
|
||||
if (ss.fail()) { // read failed.
|
||||
index->second_index = index->second_index - 1;
|
||||
|
@ -115,7 +115,6 @@ void GetParaSet(std::string str_line, int32_t *flag_ctl, int32_t *flag_tap, YuvW
|
|||
}
|
||||
|
||||
int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) {
|
||||
int32_t ret = dpSucc;
|
||||
if (rlt.first == false) {
|
||||
API_LOGE("Get real path failed. index = %u", i);
|
||||
return dpFail;
|
||||
|
@ -126,7 +125,7 @@ int32_t CheckParamater(std::pair<bool, std::string> rlt, uint32_t i) {
|
|||
return dpFail;
|
||||
}
|
||||
|
||||
return ret;
|
||||
return dpSucc;
|
||||
}
|
||||
|
||||
// Read the parameter set file and skip the comments in the file.
|
||||
|
@ -177,7 +176,7 @@ int32_t ParseFileToVar(const std::string *para_set_name, uint32_t yuv_scaler_par
|
|||
}
|
||||
|
||||
// cale the number of "{",check the location of the data.
|
||||
if (str_line.find("{") != std::string::npos) {
|
||||
if (str_line.find('{') != std::string::npos) {
|
||||
flag_ctl++;
|
||||
flag_tap = 1;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "minddata/dataset/core/tensor.h"
|
||||
|
@ -29,9 +30,9 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
class SolarizeOp : public TensorOp {
|
||||
public:
|
||||
explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(threshold) {}
|
||||
explicit SolarizeOp(std::vector<uint8_t> threshold = {0, 255}) : threshold_(std::move(threshold)) {}
|
||||
|
||||
~SolarizeOp() = default;
|
||||
~SolarizeOp() override = default;
|
||||
|
||||
Status Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) override;
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace dataset {
|
|||
class SwapRedBlueOp : public TensorOp {
|
||||
public:
|
||||
/// \brief Constructor
|
||||
SwapRedBlueOp() {}
|
||||
SwapRedBlueOp() = default;
|
||||
|
||||
SwapRedBlueOp(const SwapRedBlueOp &rhs) = default;
|
||||
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace dataset {
|
|||
const int UniformAugOp::kDefNumOps = 2;
|
||||
|
||||
UniformAugOp::UniformAugOp(std::vector<std::shared_ptr<TensorOp>> op_list, int32_t num_ops)
|
||||
: tensor_op_list_(op_list), num_ops_(num_ops) {
|
||||
: tensor_op_list_(std::move(op_list)), num_ops_(num_ops) {
|
||||
rnd_.seed(GetSeed());
|
||||
}
|
||||
|
||||
|
|
|
@ -70,7 +70,7 @@ Status ComposeOperation::ValidateParams() {
|
|||
std::shared_ptr<TensorOp> ComposeOperation::Build() {
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
|
||||
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
[](const auto &op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
return std::make_shared<ComposeOp>(tensor_ops);
|
||||
}
|
||||
|
||||
|
@ -184,7 +184,7 @@ std::shared_ptr<TensorOp> PadEndOperation::Build() { return std::make_shared<Pad
|
|||
#endif
|
||||
|
||||
// PreBuiltOperation
|
||||
PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(tensor_op) {
|
||||
PreBuiltOperation::PreBuiltOperation(std::shared_ptr<TensorOp> tensor_op) : op_(std::move(tensor_op)) {
|
||||
#ifdef ENABLE_PYTHON
|
||||
auto pyfunc_tensor_op = std::dynamic_pointer_cast<PyFuncOp>(tensor_op);
|
||||
if (pyfunc_tensor_op && pyfunc_tensor_op->IsRandom()) random_op_ = true;
|
||||
|
@ -231,7 +231,7 @@ Status RandomChoiceOperation::ValidateParams() {
|
|||
std::shared_ptr<TensorOp> RandomChoiceOperation::Build() {
|
||||
std::vector<std::shared_ptr<TensorOp>> tensor_ops;
|
||||
(void)std::transform(transforms_.begin(), transforms_.end(), std::back_inserter(tensor_ops),
|
||||
[](std::shared_ptr<TensorOperation> op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
[](const auto &op) -> std::shared_ptr<TensorOp> { return op->Build(); });
|
||||
return std::make_shared<RandomChoiceOp>(tensor_ops);
|
||||
}
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue