forked from mindspore-Ecosystem/mindspore
!1192 Support string type Stage !
Merge pull request !1192 from h.farahat/string_Tensor
This commit is contained in:
commit
051567ba20
|
@ -237,6 +237,11 @@ void bindTensor(py::module *m) {
|
|||
.def("type", &Tensor::type)
|
||||
.def("as_array", [](py::object &t) {
|
||||
auto &tensor = py::cast<Tensor &>(t);
|
||||
if (tensor.type() == DataType::DE_STRING) {
|
||||
py::array res;
|
||||
tensor.GetDataAsNumpyStrings(&res);
|
||||
return res;
|
||||
}
|
||||
py::buffer_info info;
|
||||
THROW_IF_ERROR(Tensor::GetBufferInfo(tensor, &info));
|
||||
return py::array(pybind11::dtype(info), info.shape, info.strides, info.ptr, t);
|
||||
|
|
|
@ -24,15 +24,15 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
CVTensor::CVTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) {
|
||||
(void)this->MatInit(StartAddr(), shape_, type_, &mat_);
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
|
||||
CVTensor::CVTensor(const TensorShape &shape, const DataType &type, const uchar *data) : Tensor(shape, type, data) {
|
||||
(void)this->MatInit(StartAddr(), shape_, type_, &mat_);
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
|
||||
CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) {
|
||||
(void)this->MatInit(StartAddr(), shape_, type_, &mat_);
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
|
||||
std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) {
|
||||
|
@ -83,19 +83,19 @@ Status CVTensor::MatInit(uchar *data, const TensorShape &shape, const DataType &
|
|||
|
||||
Status CVTensor::Reshape(const TensorShape &shape) {
|
||||
RETURN_IF_NOT_OK(Tensor::Reshape(shape));
|
||||
RETURN_IF_NOT_OK(this->MatInit(StartAddr(), shape_, type_, &mat_));
|
||||
RETURN_IF_NOT_OK(this->MatInit(GetMutableBuffer(), shape_, type_, &mat_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CVTensor::ExpandDim(const dsize_t &axis) {
|
||||
RETURN_IF_NOT_OK(Tensor::ExpandDim(axis));
|
||||
RETURN_IF_NOT_OK(this->MatInit(StartAddr(), shape_, type_, &mat_));
|
||||
RETURN_IF_NOT_OK(this->MatInit(GetMutableBuffer(), shape_, type_, &mat_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void CVTensor::Squeeze() {
|
||||
Tensor::Squeeze();
|
||||
(void)this->MatInit(StartAddr(), shape_, type_, &mat_);
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -15,116 +15,40 @@
|
|||
*/
|
||||
#include "dataset/core/data_type.h"
|
||||
|
||||
#include <opencv2/core/hal/interface.h>
|
||||
|
||||
#include "utils/log_adapter.h"
|
||||
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/core/pybind_support.h"
|
||||
#include "dataset/util/de_error.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
uint8_t DataType::SizeInBytes() const {
|
||||
switch (type_) {
|
||||
case DataType::DE_BOOL:
|
||||
case DataType::DE_INT8:
|
||||
case DataType::DE_UINT8:
|
||||
return 1;
|
||||
case DataType::DE_INT16:
|
||||
case DataType::DE_UINT16:
|
||||
case DataType::DE_FLOAT16:
|
||||
return 2;
|
||||
case DataType::DE_INT32:
|
||||
case DataType::DE_UINT32:
|
||||
case DataType::DE_FLOAT32:
|
||||
return 4;
|
||||
case DataType::DE_INT64:
|
||||
case DataType::DE_UINT64:
|
||||
case DataType::DE_FLOAT64:
|
||||
return 8;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
if (type_ < DataType::NUM_OF_TYPES)
|
||||
return SIZE_IN_BYTES[type_];
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
py::dtype DataType::AsNumpyType() const {
|
||||
std::string s;
|
||||
switch (type_) {
|
||||
case DataType::DE_BOOL:
|
||||
s = "bool";
|
||||
break;
|
||||
case DataType::DE_INT8:
|
||||
s = "int8";
|
||||
break;
|
||||
case DataType::DE_UINT8:
|
||||
s = "uint8";
|
||||
break;
|
||||
case DataType::DE_INT16:
|
||||
s = "int16";
|
||||
break;
|
||||
case DataType::DE_UINT16:
|
||||
s = "uint16";
|
||||
break;
|
||||
case DataType::DE_INT32:
|
||||
s = "int32";
|
||||
break;
|
||||
case DataType::DE_UINT32:
|
||||
s = "uint32";
|
||||
break;
|
||||
case DataType::DE_INT64:
|
||||
s = "int64";
|
||||
break;
|
||||
case DataType::DE_UINT64:
|
||||
s = "uint64";
|
||||
break;
|
||||
case DataType::DE_FLOAT16:
|
||||
s = "float16";
|
||||
break;
|
||||
case DataType::DE_FLOAT32:
|
||||
s = "float32";
|
||||
break;
|
||||
case DataType::DE_FLOAT64:
|
||||
s = "double";
|
||||
break;
|
||||
case DataType::DE_UNKNOWN:
|
||||
s = "unknown";
|
||||
break;
|
||||
default:
|
||||
s = "unknown";
|
||||
break;
|
||||
}
|
||||
return py::dtype(s);
|
||||
if (type_ < DataType::NUM_OF_TYPES)
|
||||
return py::dtype(PYBIND_TYPES[type_]);
|
||||
else
|
||||
return py::dtype("unknown");
|
||||
}
|
||||
|
||||
uint8_t DataType::AsCVType() const {
|
||||
switch (type_) {
|
||||
case DataType::DE_BOOL:
|
||||
return CV_8U;
|
||||
case DataType::DE_INT8:
|
||||
return CV_8S;
|
||||
case DataType::DE_UINT8:
|
||||
return CV_8U;
|
||||
case DataType::DE_INT16:
|
||||
return CV_16S;
|
||||
case DataType::DE_UINT16:
|
||||
return CV_16U;
|
||||
case DataType::DE_INT32:
|
||||
return CV_32S;
|
||||
case DataType::DE_FLOAT16:
|
||||
return CV_16F;
|
||||
case DataType::DE_FLOAT32:
|
||||
return CV_32F;
|
||||
case DataType::DE_FLOAT64:
|
||||
return CV_64F;
|
||||
case DataType::DE_UINT32:
|
||||
case DataType::DE_INT64:
|
||||
case DataType::DE_UINT64:
|
||||
default:
|
||||
MS_LOG(ERROR) << "Cannot convert to OpenCV type. Return invalid type!";
|
||||
return kCVInvalidType;
|
||||
uint8_t res = kCVInvalidType;
|
||||
if (type_ < DataType::NUM_OF_TYPES) {
|
||||
res = CV_TYPES[type_];
|
||||
}
|
||||
}
|
||||
|
||||
if (res == kCVInvalidType) {
|
||||
MS_LOG(ERROR) << "Cannot convert to OpenCV type. Return invalid type!";
|
||||
}
|
||||
|
||||
return res;
|
||||
} // namespace dataset
|
||||
|
||||
DataType DataType::FromCVType(int cv_type) {
|
||||
auto depth = static_cast<uchar>(cv_type) & static_cast<uchar>(CV_MAT_DEPTH_MASK);
|
||||
|
@ -176,72 +100,17 @@ DataType::DataType(const std::string &type_str) {
|
|||
type_ = DE_FLOAT32;
|
||||
else if (type_str == "float64")
|
||||
type_ = DE_FLOAT64;
|
||||
else if (type_str == "string")
|
||||
type_ = DE_STRING;
|
||||
else
|
||||
type_ = DE_UNKNOWN;
|
||||
}
|
||||
|
||||
std::string DataType::ToString() const {
|
||||
switch (type_) {
|
||||
case DataType::DE_BOOL:
|
||||
return "bool";
|
||||
case DataType::DE_INT8:
|
||||
return "int8";
|
||||
case DataType::DE_UINT8:
|
||||
return "uint8";
|
||||
case DataType::DE_INT16:
|
||||
return "int16";
|
||||
case DataType::DE_UINT16:
|
||||
return "uint16";
|
||||
case DataType::DE_INT32:
|
||||
return "int32";
|
||||
case DataType::DE_UINT32:
|
||||
return "uint32";
|
||||
case DataType::DE_INT64:
|
||||
return "int64";
|
||||
case DataType::DE_UINT64:
|
||||
return "uint64";
|
||||
case DataType::DE_FLOAT16:
|
||||
return "float16";
|
||||
case DataType::DE_FLOAT32:
|
||||
return "float32";
|
||||
case DataType::DE_FLOAT64:
|
||||
return "float64";
|
||||
case DataType::DE_UNKNOWN:
|
||||
return "unknown";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
DataType DataType::FromNpType(const py::dtype &type) {
|
||||
if (type.is(py::dtype("bool"))) {
|
||||
return DataType(DataType::DE_BOOL);
|
||||
} else if (type.is(py::dtype("int8"))) {
|
||||
return DataType(DataType::DE_INT8);
|
||||
} else if (type.is(py::dtype("uint8"))) {
|
||||
return DataType(DataType::DE_UINT8);
|
||||
} else if (type.is(py::dtype("int16"))) {
|
||||
return DataType(DataType::DE_INT16);
|
||||
} else if (type.is(py::dtype("uint16"))) {
|
||||
return DataType(DataType::DE_UINT16);
|
||||
} else if (type.is(py::dtype("int32"))) {
|
||||
return DataType(DataType::DE_INT32);
|
||||
} else if (type.is(py::dtype("uint32"))) {
|
||||
return DataType(DataType::DE_UINT32);
|
||||
} else if (type.is(py::dtype("int64"))) {
|
||||
return DataType(DataType::DE_INT64);
|
||||
} else if (type.is(py::dtype("uint64"))) {
|
||||
return DataType(DataType::DE_UINT64);
|
||||
} else if (type.is(py::dtype("float16"))) {
|
||||
return DataType(DataType::DE_FLOAT16);
|
||||
} else if (type.is(py::dtype("float32"))) {
|
||||
return DataType(DataType::DE_FLOAT32);
|
||||
} else if (type.is(py::dtype("double"))) {
|
||||
return DataType(DataType::DE_FLOAT64);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Cannot convert from numpy type. Unknown data type is returned!";
|
||||
return DataType(DataType::DE_UNKNOWN);
|
||||
}
|
||||
if (type_ < DataType::NUM_OF_TYPES)
|
||||
return TO_STRINGS[type_];
|
||||
else
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
DataType DataType::FromNpArray(const py::array &arr) {
|
||||
|
@ -269,6 +138,8 @@ DataType DataType::FromNpArray(const py::array &arr) {
|
|||
return DataType(DataType::DE_FLOAT32);
|
||||
} else if (py::isinstance<py::array_t<std::double_t>>(arr)) {
|
||||
return DataType(DataType::DE_FLOAT64);
|
||||
} else if (arr.dtype().kind() == 'S') {
|
||||
return DataType(DataType::DE_STRING);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Cannot convert from numpy type. Unknown data type is returned!";
|
||||
return DataType(DataType::DE_UNKNOWN);
|
||||
|
@ -276,36 +147,16 @@ DataType DataType::FromNpArray(const py::array &arr) {
|
|||
}
|
||||
|
||||
std::string DataType::GetPybindFormat() const {
|
||||
switch (type_) {
|
||||
case DataType::DE_BOOL:
|
||||
return py::format_descriptor<bool>::format();
|
||||
case DataType::DE_INT8:
|
||||
return py::format_descriptor<int8_t>::format();
|
||||
case DataType::DE_UINT8:
|
||||
return py::format_descriptor<uint8_t>::format();
|
||||
case DataType::DE_INT16:
|
||||
return py::format_descriptor<int16_t>::format();
|
||||
case DataType::DE_UINT16:
|
||||
return py::format_descriptor<uint16_t>::format();
|
||||
case DataType::DE_INT32:
|
||||
return py::format_descriptor<int32_t>::format();
|
||||
case DataType::DE_UINT32:
|
||||
return py::format_descriptor<uint32_t>::format();
|
||||
case DataType::DE_INT64:
|
||||
return py::format_descriptor<int64_t>::format();
|
||||
case DataType::DE_UINT64:
|
||||
return py::format_descriptor<uint64_t>::format();
|
||||
case DataType::DE_FLOAT16:
|
||||
// Eigen 3.3.7 doesn't support py::format_descriptor<Eigen::half>::format()
|
||||
return "e";
|
||||
case DataType::DE_FLOAT32:
|
||||
return py::format_descriptor<float>::format();
|
||||
case DataType::DE_FLOAT64:
|
||||
return py::format_descriptor<double>::format();
|
||||
default:
|
||||
MS_LOG(ERROR) << "Cannot convert from data type to pybind format descriptor!";
|
||||
return "";
|
||||
std::string res;
|
||||
if (type_ < DataType::NUM_OF_TYPES) {
|
||||
res = PYBIND_FORMAT_DESCRIPTOR[type_];
|
||||
}
|
||||
|
||||
if (res.empty()) {
|
||||
MS_LOG(ERROR) << "Cannot convert from data type to pybind format descriptor!";
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -16,18 +16,25 @@
|
|||
#ifndef DATASET_CORE_DATA_TYPE_H_
|
||||
#define DATASET_CORE_DATA_TYPE_H_
|
||||
|
||||
#include <opencv2/core/hal/interface.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "pybind11/numpy.h"
|
||||
#include "pybind11/pybind11.h"
|
||||
|
||||
#include "dataset/core/constants.h"
|
||||
#include "dataset/core/pybind_support.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
// Class that represents basic data types in DataEngine.
|
||||
class DataType {
|
||||
public:
|
||||
enum Type : uint8_t {
|
||||
DE_UNKNOWN = 0,
|
||||
DE_BOOL,
|
||||
DE_INT8,
|
||||
DE_UINT8,
|
||||
|
@ -40,20 +47,60 @@ class DataType {
|
|||
DE_FLOAT16,
|
||||
DE_FLOAT32,
|
||||
DE_FLOAT64,
|
||||
DE_UNKNOWN
|
||||
DE_STRING,
|
||||
NUM_OF_TYPES
|
||||
};
|
||||
|
||||
static constexpr uint8_t DE_BOOL_SIZE = 1;
|
||||
static constexpr uint8_t DE_UINT8_SIZE = 1;
|
||||
static constexpr uint8_t DE_INT8_SIZE = 1;
|
||||
static constexpr uint8_t DE_UINT16_SIZE = 2;
|
||||
static constexpr uint8_t DE_INT16_SIZE = 2;
|
||||
static constexpr uint8_t DE_UINT32_SIZE = 4;
|
||||
static constexpr uint8_t DE_INT32_SIZE = 4;
|
||||
static constexpr uint8_t DE_INT64_SIZE = 8;
|
||||
static constexpr uint8_t DE_UINT64_SIZE = 8;
|
||||
static constexpr uint8_t DE_FLOAT32_SIZE = 4;
|
||||
static constexpr uint8_t DE_FLOAT64_SIZE = 8;
|
||||
inline static constexpr uint8_t SIZE_IN_BYTES[] = {0, // DE_UNKNOWN
|
||||
1, // DE_BOOL
|
||||
1, // DE_INT8
|
||||
1, // DE_UINT8
|
||||
2, // DE_INT16
|
||||
2, // DE_UINT16
|
||||
4, // DE_INT32
|
||||
4, // DE_UINT32
|
||||
8, // DE_INT64
|
||||
8, // DE_UINT64
|
||||
2, // DE_FLOAT16
|
||||
4, // DE_FLOAT32
|
||||
8, // DE_FLOAT64
|
||||
0}; // DE_STRING
|
||||
|
||||
inline static const char *TO_STRINGS[] = {"unknown", "bool", "int8", "uint8", "int16", "uint16", "int32",
|
||||
"uint32", "int64", "uint64", "float16", "float32", "float64", "string"};
|
||||
|
||||
inline static const char *PYBIND_TYPES[] = {"object", "bool", "int8", "uint8", "int16", "uint16", "int32",
|
||||
"uint32", "int64", "uint64", "float16", "float32", "double", "bytes"};
|
||||
|
||||
inline static const std::string PYBIND_FORMAT_DESCRIPTOR[] = {"", // DE_UNKNOWN
|
||||
py::format_descriptor<bool>::format(), // DE_BOOL
|
||||
py::format_descriptor<int8_t>::format(), // DE_INT8
|
||||
py::format_descriptor<uint8_t>::format(), // DE_UINT8
|
||||
py::format_descriptor<int16_t>::format(), // DE_INT16
|
||||
py::format_descriptor<uint16_t>::format(), // DE_UINT16
|
||||
py::format_descriptor<int32_t>::format(), // DE_INT32
|
||||
py::format_descriptor<uint32_t>::format(), // DE_UINT32
|
||||
py::format_descriptor<int64_t>::format(), // DE_INT64
|
||||
py::format_descriptor<uint64_t>::format(), // DE_UINT64
|
||||
"e", // DE_FLOAT16
|
||||
py::format_descriptor<float>::format(), // DE_FLOAT32
|
||||
py::format_descriptor<double>::format(), // DE_FLOAT64
|
||||
"S"}; // DE_STRING
|
||||
|
||||
inline static constexpr uint8_t CV_TYPES[] = {kCVInvalidType, // DE_UNKNOWN
|
||||
CV_8U, // DE_BOOL
|
||||
CV_8S, // DE_INT8
|
||||
CV_8U, // DE_UINT8
|
||||
CV_16S, // DE_INT16
|
||||
CV_16U, // DE_UINT16
|
||||
CV_32S, // DE_INT32
|
||||
kCVInvalidType, // DE_UINT32
|
||||
kCVInvalidType, // DE_INT64
|
||||
kCVInvalidType, // DE_UINT64
|
||||
CV_16F, // DE_FLOAT16
|
||||
CV_32F, // DE_FLOAT32
|
||||
CV_64F, // DE_FLOAT64
|
||||
kCVInvalidType}; // DE_STRING
|
||||
|
||||
// No arg constructor to create an unknown shape
|
||||
DataType() : type_(DE_UNKNOWN) {}
|
||||
|
@ -160,6 +207,8 @@ class DataType {
|
|||
|
||||
bool IsBool() const { return type_ == DataType::DE_BOOL; }
|
||||
|
||||
bool IsNumeric() const { return type_ != DataType::DE_STRING; }
|
||||
|
||||
Type value() const { return type_; }
|
||||
|
||||
private:
|
||||
|
@ -226,6 +275,11 @@ inline bool DataType::IsCompatible<uint8_t>() const {
|
|||
return type_ == DataType::DE_UINT8;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsCompatible<std::string_view>() const {
|
||||
return type_ == DataType::DE_STRING;
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<bool>() const {
|
||||
return type_ == DataType::DE_BOOL;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
#include "dataset/core/tensor.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
|
@ -60,7 +61,7 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned ch
|
|||
if (data != nullptr) {
|
||||
// Given the shape/type of this tensor, compute the data size and copy in the input bytes.
|
||||
int64_t byte_size = this->SizeInBytes();
|
||||
static_cast<void>(this->StartAddr()); // Allocates data_ inside itself
|
||||
static_cast<void>(this->GetMutableBuffer()); // Allocates data_ inside itself
|
||||
if (data_ != nullptr) {
|
||||
int ret_code = memcpy_s(data_, byte_size, data, byte_size);
|
||||
if (ret_code != 0) {
|
||||
|
@ -75,7 +76,7 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned ch
|
|||
Tensor::Tensor(Tensor &&other) noexcept
|
||||
: shape_(other.shape()),
|
||||
type_(other.type()),
|
||||
data_(other.StartAddr()),
|
||||
data_(other.GetMutableBuffer()),
|
||||
data_allocator_(std::move(other.data_allocator_)) {
|
||||
other.Invalidate();
|
||||
}
|
||||
|
@ -84,7 +85,7 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
|
|||
if (&other != this) {
|
||||
shape_ = other.shape();
|
||||
type_ = other.type();
|
||||
data_ = other.StartAddr();
|
||||
data_ = other.GetMutableBuffer();
|
||||
data_end_ = other.data_end_;
|
||||
data_allocator_ = std::move(other.data_allocator_);
|
||||
other.Invalidate();
|
||||
|
@ -92,6 +93,37 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
|
|||
return *this;
|
||||
}
|
||||
|
||||
Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape)
|
||||
: Tensor(TensorShape({static_cast<dsize_t>(strings.size())}), DataType(DataType::DE_STRING)) {
|
||||
auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
|
||||
dsize_t total_length = std::accumulate(strings.begin(), strings.end(), 0, length_sum);
|
||||
|
||||
dsize_t num_bytes = (kOffsetSize + 1) * shape_.NumOfElements() + total_length;
|
||||
|
||||
data_ = data_allocator_->allocate(num_bytes);
|
||||
|
||||
auto offset_arr = reinterpret_cast<offset_t *>(data_);
|
||||
uchar *buf = GetStringsBuffer();
|
||||
|
||||
offset_t offset = -1;
|
||||
uint32_t i = 0;
|
||||
for (const auto &str : strings) {
|
||||
// insert the end index of the string
|
||||
// end index of a string is the end index of previous string + the length (including \0)
|
||||
offset = offset + str.length() + 1;
|
||||
offset_arr[i++] = offset;
|
||||
// total bytes are reduced by kOffsetSize
|
||||
num_bytes -= kOffsetSize;
|
||||
// insert actual string
|
||||
memcpy_s(buf, num_bytes, str.c_str(), str.length() + 1);
|
||||
buf += str.length() + 1;
|
||||
num_bytes -= str.length() + 1;
|
||||
}
|
||||
this->data_end_ = buf;
|
||||
DS_ASSERT(num_bytes == 0);
|
||||
if (shape.known()) Tensor::Reshape(shape);
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
|
||||
DataType type, const unsigned char *data) {
|
||||
if (!shape.known()) {
|
||||
|
@ -120,8 +152,28 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl
|
|||
}
|
||||
return Status::OK(); // returns base-class shared_ptr
|
||||
}
|
||||
std::string to(std::string x) { return x; }
|
||||
Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
||||
std::vector<dsize_t> shape;
|
||||
for (dsize_t i = 0; i < arr.ndim(); i++) {
|
||||
shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
|
||||
}
|
||||
arr.resize({arr.size()});
|
||||
auto itr = arr.begin();
|
||||
std::vector<std::string> strings;
|
||||
for (; itr != arr.end(); itr++) {
|
||||
std::string s = to(py::cast<py::bytes>(*itr));
|
||||
strings.push_back(s);
|
||||
}
|
||||
arr.resize(shape);
|
||||
|
||||
return CreateTensor(ptr, strings, TensorShape{shape});
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
||||
if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
|
||||
return CreateTensorFromNumpyString(ptr, arr);
|
||||
}
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*ptr = std::allocate_shared<Tensor>(*alloc, TensorShape({}), DataType(DataType::DE_UNKNOWN));
|
||||
|
||||
|
@ -138,7 +190,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
|||
|
||||
std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
|
||||
(*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
|
||||
static_cast<void>((*ptr)->StartAddr());
|
||||
static_cast<void>((*ptr)->GetMutableBuffer());
|
||||
int64_t byte_size = (*ptr)->SizeInBytes();
|
||||
unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
|
||||
if ((*ptr)->data_ == nullptr) {
|
||||
|
@ -173,6 +225,13 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
|||
return Status::OK(); // returns base-class shared_ptr
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
|
||||
const TensorShape &shape) {
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*ptr = std::allocate_shared<Tensor>(*alloc, strings, shape);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Memcpy the given strided array's used part to consecutive memory
|
||||
// Consider a 3-d array
|
||||
// A[(i * shape[1] + j) * shape[2] + k] = B[i][j][k] = C[i * strides[0] + j * strides[1] + k * strides[2]]
|
||||
|
@ -264,6 +323,12 @@ void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) c
|
|||
|
||||
CASE_PRINT(DataType::DE_FLOAT64, double);
|
||||
|
||||
case DataType::DE_STRING: {
|
||||
std::string_view o{""};
|
||||
GetItemAt(&o, index);
|
||||
out << "\"" << o << "\"";
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
out << "?";
|
||||
break;
|
||||
|
@ -324,12 +389,12 @@ Status Tensor::ToFlatIndex(const std::vector<dsize_t> &index, dsize_t *flat_inde
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
const unsigned char *Tensor::StartAddr() const {
|
||||
const unsigned char *Tensor::GetBuffer() const {
|
||||
// This version cannot modify anything. data_ could possibly be null.
|
||||
return data_;
|
||||
}
|
||||
|
||||
unsigned char *Tensor::StartAddr() {
|
||||
unsigned char *Tensor::GetMutableBuffer() {
|
||||
if (!shape_.known() || type_ == DataType::DE_UNKNOWN) {
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -381,6 +446,25 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
|
|||
dsize_t flat_idx;
|
||||
RETURN_IF_NOT_OK(ToFlatIndex(index, &flat_idx));
|
||||
*ptr = reinterpret_cast<T *>(data_ + flat_idx * type_.SizeInBytes());
|
||||
|
||||
return Status::OK();
|
||||
} else {
|
||||
std::string err = "data type not compatible";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
}
|
||||
|
||||
Status Tensor::GetItemPtr(uchar **ptr, const std::vector<dsize_t> &index, offset_t *length) const {
|
||||
if (type_ == DataType::DE_STRING) {
|
||||
if (data_ == nullptr) {
|
||||
std::string err = "Data is not allocated yet";
|
||||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
dsize_t flat_idx;
|
||||
RETURN_IF_NOT_OK(ToFlatIndex(index, &flat_idx));
|
||||
offset_t length_temp = 0;
|
||||
RETURN_IF_NOT_OK(GetStringAt(flat_idx, ptr, &length_temp));
|
||||
if (length != nullptr) *length = length_temp;
|
||||
return Status::OK();
|
||||
} else {
|
||||
std::string err = "data type not compatible";
|
||||
|
@ -389,23 +473,27 @@ Status Tensor::GetItemPtr(T **ptr, const std::vector<dsize_t> &index) const {
|
|||
}
|
||||
|
||||
Status Tensor::StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining) {
|
||||
if (type() == DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("StartAddrOfIndex does not support string tensors yet.");
|
||||
}
|
||||
dsize_t flat_ind;
|
||||
std::vector<dsize_t> t_shape = shape().AsVector();
|
||||
std::vector<dsize_t> r(t_shape.begin() + ind.size(), t_shape.end());
|
||||
*remaining = TensorShape(r);
|
||||
ind.resize(this->Rank(), 0); // same as -> while (ind.size() < this->Rank()) ind.push_back(0);
|
||||
RETURN_IF_NOT_OK(ToFlatIndex(ind, &flat_ind));
|
||||
// check if StartAddr() returns null, we should flag this as an error, this sanity check will only
|
||||
// check if GetBuffer() returns null, we should flag this as an error, this sanity check will only
|
||||
// be true is the tensor failed to allocate memory.
|
||||
if (StartAddr() == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid StartAddr in Tensor, got nullptr");
|
||||
if (GetMutableBuffer() == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid GetBuffer in Tensor, got nullptr");
|
||||
}
|
||||
*start_addr_of_index = StartAddr() + flat_ind * this->type().SizeInBytes();
|
||||
*start_addr_of_index = GetMutableBuffer() + flat_ind * this->type().SizeInBytes();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_ptr<Tensor> &tensor) {
|
||||
std::string err_msg;
|
||||
err_msg += (this->type() == DataType::DE_STRING) ? "[Tensor] Cannot batch tensors of type string\n" : "";
|
||||
err_msg += (!this->shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : "";
|
||||
err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : "";
|
||||
err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : "";
|
||||
|
@ -418,7 +506,8 @@ Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_p
|
|||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
} else {
|
||||
if (start_addr_of_ind != nullptr) {
|
||||
int ret_code = memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->StartAddr(), tensor->SizeInBytes());
|
||||
int ret_code =
|
||||
memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes());
|
||||
if (ret_code == 0) {
|
||||
return Status::OK();
|
||||
} else {
|
||||
|
@ -446,21 +535,20 @@ Status Tensor::ExpandDim(const dsize_t &axis) {
|
|||
}
|
||||
|
||||
std::vector<dsize_t> Tensor::Strides() {
|
||||
std::vector<dsize_t> strides(Rank());
|
||||
dsize_t count = shape_.NumOfElements();
|
||||
for (dsize_t i = 0; i < Rank(); i++) {
|
||||
count /= shape_[i];
|
||||
strides[i] = type_.SizeInBytes() * count;
|
||||
}
|
||||
std::vector<dsize_t> strides = shape_.Strides();
|
||||
uint8_t size = type_.SizeInBytes();
|
||||
std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
|
||||
return strides;
|
||||
}
|
||||
|
||||
Status Tensor::GetBufferInfo(Tensor &t, py::buffer_info *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(t.type().IsNumeric(), "Cannot use GetBufferInfo on tensor of strings.");
|
||||
|
||||
std::string format_desc = t.type().GetPybindFormat();
|
||||
if (format_desc.empty()) {
|
||||
RETURN_STATUS_UNEXPECTED("Cannot convert DE type tp pybind format");
|
||||
}
|
||||
*out = py::buffer_info(t.StartAddr(), /* Pointer to buffer */
|
||||
*out = py::buffer_info(t.GetMutableBuffer(), /* Pointer to buffer */
|
||||
t.type().SizeInBytes(), /* Size of one scalar */
|
||||
format_desc, /* Python struct-style format descriptor */
|
||||
t.Rank(), /* Number of dimensions */
|
||||
|
@ -495,6 +583,18 @@ Status Tensor::GetItemAt(T *o, const std::vector<dsize_t> &index) const {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
RETURN_UNEXPECTED_IF_NULL(o);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not DE_STRING");
|
||||
|
||||
uchar *buf = nullptr;
|
||||
offset_t length = 0;
|
||||
RETURN_IF_NOT_OK(GetItemPtr(&buf, index, &length));
|
||||
std::string_view sv{reinterpret_cast<const char *>(buf), length};
|
||||
o->swap(sv);
|
||||
return Status::OK();
|
||||
}
|
||||
// return data as numpy, should return status
|
||||
Status Tensor::GetDataAsNumpy(py::array *data) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
|
@ -523,11 +623,36 @@ Status Tensor::GetDataAsNumpy(py::array *data) {
|
|||
*data = py::array_t<float>(shape_.AsVector(), reinterpret_cast<float *>(data_));
|
||||
} else if (type_ == DataType::DE_FLOAT64) {
|
||||
*data = py::array_t<double>(shape_.AsVector(), reinterpret_cast<double *>(data_));
|
||||
} else if (type_ == DataType::DE_STRING) {
|
||||
GetDataAsNumpyStrings(data);
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Got unexpected type when returning numpy");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
Status Tensor::GetDataAsNumpyStrings(py::array *data) {
|
||||
auto itr = begin<std::string_view>();
|
||||
uint64_t max = 0;
|
||||
for (; itr != end<std::string_view>(); itr++) {
|
||||
max = std::max((*itr).length(), max);
|
||||
}
|
||||
uint64_t total_size = shape_.NumOfElements() * max;
|
||||
char *tmp_data = reinterpret_cast<char *>(data_allocator_->allocate(total_size));
|
||||
if (tmp_data == nullptr) RETURN_STATUS_UNEXPECTED("Cannot create temp array.");
|
||||
memset(tmp_data, 0, total_size);
|
||||
|
||||
itr = begin<std::string_view>();
|
||||
uint64_t i = 0;
|
||||
for (; itr != end<std::string_view>(); itr++) {
|
||||
(void)memcpy_s(tmp_data + i * max, total_size, (*itr).data(), (*itr).length());
|
||||
i++;
|
||||
}
|
||||
auto strides = shape_.Strides();
|
||||
std::transform(strides.begin(), strides.end(), strides.begin(), [&max](const auto &s) { return s * max; });
|
||||
*data = py::array(py::dtype("S" + std::to_string(max)), shape_.AsVector(), strides, tmp_data);
|
||||
data_allocator_->deallocate(reinterpret_cast<uchar *>(tmp_data));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void Tensor::Squeeze() { shape_ = shape_.Squeeze(); }
|
||||
|
||||
|
@ -647,5 +772,19 @@ Status Tensor::GetFloatAt(T *o, const std::vector<dsize_t> &index) const {
|
|||
}
|
||||
return Status::OK();
|
||||
}
|
||||
Status Tensor::GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ == DataType::DE_STRING, "Type is not string");
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
RETURN_UNEXPECTED_IF_NULL(string_start);
|
||||
RETURN_UNEXPECTED_IF_NULL(length);
|
||||
auto *offset_ptr = reinterpret_cast<offset_t *>(data_); // offsets starts here
|
||||
offset_t end = offset_ptr[index];
|
||||
offset_t start = 0;
|
||||
if (index != 0) start = offset_ptr[index - 1] + 1; // string starts at where the previous string ends + 1
|
||||
uchar *buf = GetStringsBuffer(); // string data starts here
|
||||
*string_start = buf + start;
|
||||
*length = end - start;
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -47,8 +47,6 @@ using TensorRow = std::vector<std::shared_ptr<Tensor>>; // A row is a set of
|
|||
using TensorTable = std::vector<TensorRow>; // The table of tensors is a vector of rows
|
||||
using TensorQTable = std::deque<TensorRow>; // A different flavour of tensor table, this one has queue functionality
|
||||
|
||||
// Tensor base class which holds the data in an unsigned char* buffer.
|
||||
|
||||
class Tensor {
|
||||
public:
|
||||
Tensor() = delete;
|
||||
|
@ -74,6 +72,27 @@ class Tensor {
|
|||
|
||||
Tensor &operator=(Tensor &&other) noexcept;
|
||||
|
||||
// type of offest values to store strings information
|
||||
using offset_t = uint32_t;
|
||||
// const of the size of the offset variable
|
||||
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
|
||||
// Tensor base class which holds the data in an unsigned char* buffer.
|
||||
|
||||
// Construct a scalar string Tensor
|
||||
explicit Tensor(const std::string &str) : Tensor(std::vector<std::string>{str}, TensorShape::CreateScalar()) {}
|
||||
|
||||
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is
|
||||
// the size of the vector `strings`.
|
||||
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
|
||||
// OFFSET1, OFFSET2, ... String1, String2, ...
|
||||
// The value of each offset is the end index of the corresponding string
|
||||
// Offsets is of type offest_t
|
||||
// strings will ne null-terminated
|
||||
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
|
||||
// 3 6 a b c \0 d e \0
|
||||
explicit Tensor(const std::vector<std::string> &strings,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
|
||||
// A static factory method to create the given flavour of derived Tensor
|
||||
// Returns the base class reference for the Tensor.
|
||||
// @param ptr output argument to hold the created Tensor of given tensor_impl
|
||||
|
@ -91,6 +110,17 @@ class Tensor {
|
|||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr);
|
||||
|
||||
// Helper function to create a tensor from Numpy of strings
|
||||
static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
|
||||
|
||||
// A static factory method to create a Tensor from a given list of strings.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param strings elements of the tensor
|
||||
// @param shape shape of the tensor
|
||||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
|
||||
// Copy raw data of a array based on shape and strides to the destination pointer
|
||||
// @param dst Pointer to the destination array where the content is to be copied
|
||||
// @param src Pointer to the source of strided array to be copied
|
||||
|
@ -116,6 +146,11 @@ class Tensor {
|
|||
template <typename T>
|
||||
Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// Get string located at `index`.
|
||||
// @param index vector<dsize_t>
|
||||
// @return return std::string_view specified at index
|
||||
Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
template <typename T>
|
||||
Status GetUnsignedIntAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
|
@ -131,26 +166,44 @@ class Tensor {
|
|||
// @param value of type `T`
|
||||
template <typename T>
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
|
||||
static_cast<void>(StartAddr());
|
||||
static_cast<void>(GetMutableBuffer());
|
||||
T *ptr = nullptr;
|
||||
RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
|
||||
*ptr = value;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// fill tensor with Zeros
|
||||
// set string item at location specified by index
|
||||
// @param index
|
||||
// @param value of type std::string
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
uchar *ptr = nullptr;
|
||||
offset_t length = 0;
|
||||
RETURN_IF_NOT_OK(GetItemPtr(&ptr, index, &length));
|
||||
if (value.length() != length) {
|
||||
RETURN_STATUS_UNEXPECTED("Length of the new string does not match the item.");
|
||||
}
|
||||
memcpy_s(reinterpret_cast<char *>(ptr), length, value.c_str(), length);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
// fill tensor with Zeros. Does not support strings.
|
||||
Status Zero() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings..");
|
||||
dsize_t size = SizeInBytes();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(StartAddr(), size, 0, size) == 0, "Failed to fill tensor with zeroes.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(memset_sp(GetMutableBuffer(), size, 0, size) == 0,
|
||||
"Failed to fill tensor with zeroes.");
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Fill all elements in the Tensor with the given value of type `T`
|
||||
// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
|
||||
// @tparam T
|
||||
// @param value
|
||||
template <typename T>
|
||||
Status Fill(const T &value) {
|
||||
static_cast<void>(StartAddr());
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings.");
|
||||
static_cast<void>(GetMutableBuffer());
|
||||
int64_t cellSize = type_.SizeInBytes();
|
||||
if ((data_ != nullptr) && type_.IsCompatible<T>()) {
|
||||
for (dsize_t i = 0; i < Size(); i++) {
|
||||
|
@ -177,7 +230,10 @@ class Tensor {
|
|||
dsize_t Size() const { return shape().NumOfElements(); }
|
||||
|
||||
// @return the number of bytes this tensor is needs
|
||||
dsize_t SizeInBytes() const { return Size() * type_.SizeInBytes(); }
|
||||
dsize_t SizeInBytes() const {
|
||||
if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements();
|
||||
return data_end_ - data_;
|
||||
}
|
||||
|
||||
// @return the rank of the tensor
|
||||
dsize_t Rank() const { return shape().Rank(); }
|
||||
|
@ -185,12 +241,12 @@ class Tensor {
|
|||
// Get the starting memory address as a constant for the data of the tensor. This potentially
|
||||
// drives an allocation if the data area.
|
||||
// @return const unsigned char*
|
||||
const unsigned char *StartAddr() const;
|
||||
const unsigned char *GetBuffer() const;
|
||||
|
||||
// Get the starting memory address for the data of the tensor. This potentially
|
||||
// drives an allocation if the data area.
|
||||
// @return unsigned char*
|
||||
unsigned char *StartAddr();
|
||||
unsigned char *GetMutableBuffer();
|
||||
|
||||
// Getter of the type
|
||||
// @return
|
||||
|
@ -236,12 +292,12 @@ class Tensor {
|
|||
|
||||
virtual void Squeeze();
|
||||
|
||||
// Calculates the strides of the Tensor
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
// The strides will be {6,2,1}.
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
// The strides will be {24,8,4}.
|
||||
// @return vector of integers
|
||||
/// Calculates the strides of the Tensor
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
/// The strides will be {6,2,1}.
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
/// The strides will be {24,8,4}.
|
||||
/// @return vector of integers
|
||||
std::vector<dsize_t> Strides();
|
||||
|
||||
std::string ToString() {
|
||||
|
@ -255,12 +311,14 @@ class Tensor {
|
|||
// @return Status code
|
||||
Status GetDataAsNumpy(py::array *data);
|
||||
|
||||
Status GetDataAsNumpyStrings(py::array *data);
|
||||
|
||||
static Status GetBufferInfo(Tensor &t, py::buffer_info *out);
|
||||
|
||||
// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
|
||||
// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
|
||||
// @tparam T type of values in the Tensor Iterator
|
||||
template <typename T>
|
||||
template <typename T, bool = true>
|
||||
class TensorIterator {
|
||||
public:
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
|
@ -271,11 +329,14 @@ class Tensor {
|
|||
|
||||
explicit TensorIterator(uchar *ptr = nullptr) { ptr_ = reinterpret_cast<T *>(ptr); }
|
||||
|
||||
TensorIterator(const TensorIterator<T> &raw_iterator) = default;
|
||||
TensorIterator(const TensorIterator<T> &raw_iterator) { ptr_ = raw_iterator.ptr_; }
|
||||
|
||||
~TensorIterator() = default;
|
||||
|
||||
TensorIterator<T> &operator=(const TensorIterator<T> &rhs) = default;
|
||||
TensorIterator<T> &operator=(const TensorIterator<T> &rhs) {
|
||||
ptr_ = rhs.ptr_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<T> &operator=(T *rhs) {
|
||||
ptr_ = rhs;
|
||||
|
@ -346,6 +407,99 @@ class Tensor {
|
|||
T *ptr_;
|
||||
};
|
||||
|
||||
// Specialization of TensorIterator for strings. It returns std::string_view for every item.
|
||||
// @tparam DUMMY, used to mbe able to specialize the inner class
|
||||
template <bool DUMMY>
|
||||
class TensorIterator<std::string_view, DUMMY> {
|
||||
public:
|
||||
using iterator_category = std::random_access_iterator_tag;
|
||||
using value_type = std::string_view;
|
||||
using difference_type = ptrdiff_t;
|
||||
using pointer = std::string_view *;
|
||||
using reference = std::string_view &;
|
||||
|
||||
explicit TensorIterator(uchar *offset = nullptr, const uchar *buf = nullptr, dsize_t index = 0) {
|
||||
offset_ = reinterpret_cast<offset_t *>(offset);
|
||||
buf_ = reinterpret_cast<const char *>(buf);
|
||||
index_ = index;
|
||||
}
|
||||
|
||||
TensorIterator(const TensorIterator<std::string_view, DUMMY> &raw_iterator) {
|
||||
offset_ = raw_iterator.offset_;
|
||||
buf_ = raw_iterator.buf_;
|
||||
index_ = raw_iterator.index_;
|
||||
}
|
||||
|
||||
~TensorIterator() = default;
|
||||
|
||||
bool operator==(const TensorIterator<std::string_view> &rhs) {
|
||||
return buf_ == rhs.buf_ && offset_ == rhs.offset_ && index_ == rhs.index_;
|
||||
}
|
||||
|
||||
bool operator!=(const TensorIterator<std::string_view> &rhs) { return !(*this == rhs); }
|
||||
|
||||
operator bool() const { return offset_ != nullptr; }
|
||||
|
||||
std::string_view operator*() const {
|
||||
offset_t start = 0;
|
||||
if (index_ != 0) start = offset_[index_ - 1] + 1;
|
||||
return std::string_view{buf_ + start};
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator+=(const dsize_t &inc) {
|
||||
index_ += inc;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator-=(const dsize_t &inc) {
|
||||
index_ -= inc;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator++() {
|
||||
++index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> &operator--() {
|
||||
--index_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator++(int) {
|
||||
auto temp(*this);
|
||||
++index_;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator--(int) {
|
||||
auto temp(*this);
|
||||
--index_;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator+(const dsize_t &inc) {
|
||||
auto oldPtr = index_;
|
||||
index_ += inc;
|
||||
auto temp(*this);
|
||||
index_ = oldPtr;
|
||||
return temp;
|
||||
}
|
||||
|
||||
TensorIterator<std::string_view> operator-(const dsize_t &inc) {
|
||||
auto oldPtr = index_;
|
||||
index_ -= inc;
|
||||
auto temp(*this);
|
||||
index_ = oldPtr;
|
||||
return temp;
|
||||
}
|
||||
|
||||
protected:
|
||||
dsize_t index_;
|
||||
offset_t *offset_;
|
||||
const char *buf_;
|
||||
};
|
||||
|
||||
// Return a TensorIterator that points to the start of the Tensor.
|
||||
// It's the user responsibility to use the correct type that matches the Tensor type
|
||||
// @tparam T The type of values in the Tensor
|
||||
|
@ -391,6 +545,22 @@ class Tensor {
|
|||
template <typename T>
|
||||
Status GetItemPtr(T **, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// Get pointer to string located at `index` and the length of string
|
||||
// @param index vector<dsize_t>
|
||||
// @return return a pointer to the string specified at index and the length of the string
|
||||
Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
|
||||
|
||||
// Given a flat index of an item string, return the start and length of the item
|
||||
// @param index flat index of the item
|
||||
// @return start address of the ths string
|
||||
// @return length of the string
|
||||
Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
|
||||
|
||||
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the
|
||||
// tensor's type is a string, otherwise undefined address would be returned.
|
||||
// @return address of the first string of the tensor.
|
||||
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements(); }
|
||||
|
||||
// all access to shape_ should be via shape
|
||||
TensorShape shape_;
|
||||
// data type of tensor
|
||||
|
@ -402,6 +572,16 @@ class Tensor {
|
|||
// pointer to the end of the physical data
|
||||
unsigned char *data_end_ = nullptr;
|
||||
};
|
||||
template <>
|
||||
inline Tensor::TensorIterator<std::string_view> Tensor::begin<std::string_view>() {
|
||||
uchar *buf = GetStringsBuffer();
|
||||
return TensorIterator<std::string_view>(data_, buf);
|
||||
}
|
||||
template <>
|
||||
inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
|
||||
uchar *buf = GetStringsBuffer();
|
||||
return TensorIterator<std::string_view>(data_, buf, shape_.NumOfElements());
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // DATASET_CORE_TENSOR_H_
|
||||
|
|
|
@ -215,5 +215,17 @@ TensorShape TensorShape::Squeeze() const {
|
|||
}
|
||||
return TensorShape(new_shape);
|
||||
}
|
||||
std::vector<dsize_t> TensorShape::Strides() {
|
||||
std::vector<dsize_t> strides(Rank());
|
||||
dsize_t count = NumOfElements();
|
||||
for (dsize_t i = 0; i < Rank(); i++) {
|
||||
if (raw_shape_[i] != 0)
|
||||
count /= raw_shape_[i];
|
||||
else
|
||||
count = 0;
|
||||
strides[i] = count;
|
||||
}
|
||||
return strides;
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -156,6 +156,8 @@ class TensorShape {
|
|||
|
||||
TensorShape Squeeze() const;
|
||||
|
||||
std::vector<dsize_t> Strides();
|
||||
|
||||
private:
|
||||
// True if known and valid shape, false otherwise
|
||||
bool known_;
|
||||
|
|
|
@ -74,6 +74,10 @@ Status BatchOp::operator()() {
|
|||
std::unique_ptr<TensorQTable> table = std::make_unique<TensorQTable>();
|
||||
child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
|
||||
RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row));
|
||||
for (const auto &t : new_row) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(t->type().IsNumeric(),
|
||||
"[Batch ERROR] Batch does not support Tensor of type string yet.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(DatasetOp::AssignColMapFromChild()); // must come after the first fetch above
|
||||
int32_t cur_batch_size = 0;
|
||||
RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(0, 0, 0)));
|
||||
|
@ -445,8 +449,8 @@ Status BatchOp::PadHelper(std::shared_ptr<Tensor> src, std::shared_ptr<Tensor> d
|
|||
src_flat_ind += src_s[i] * cur_ind[i];
|
||||
dst_flat_ind += dst_s[i] * cur_ind[i];
|
||||
}
|
||||
unsigned char *src_addr = src->StartAddr() + src_flat_ind * type_size;
|
||||
unsigned char *dst_addr = dst->StartAddr() + dst_flat_ind * type_size;
|
||||
unsigned char *src_addr = src->GetMutableBuffer() + src_flat_ind * type_size;
|
||||
unsigned char *dst_addr = dst->GetMutableBuffer() + dst_flat_ind * type_size;
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(memcpy_s(dst_addr, len, src_addr, len) == 0, "memcpy error");
|
||||
} else { // not the last dimension, keep doing recursion
|
||||
dsize_t min_ind = std::min(dst->shape()[cur_dim], src->shape()[cur_dim]);
|
||||
|
|
|
@ -85,6 +85,13 @@ Status DeviceQueueOp::operator()() {
|
|||
|
||||
Status DeviceQueueOp::CheckExceptions(const std::unique_ptr<DataBuffer> &buffer) const {
|
||||
// this method checks if the buffer meets the conditions to be sent to TDT
|
||||
if (buffer->NumRows() != 0) {
|
||||
TensorRow row;
|
||||
buffer->GetRow(0, &row);
|
||||
for (const auto &item : row) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Cannot send tensor of string type to device.");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -207,7 +214,7 @@ Status DeviceQueueOp::MallocForGPUData(std::vector<device::DataItemGpu> *items,
|
|||
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "memory malloc failed.");
|
||||
}
|
||||
(void)memset_s(sub_item.data_ptr_, sub_item.data_len_, 0, sub_item.data_len_);
|
||||
unsigned char *column_data = curr_row[i]->StartAddr();
|
||||
unsigned char *column_data = curr_row[i]->GetMutableBuffer();
|
||||
if (memcpy_s(sub_item.data_ptr_, sub_item.data_len_, column_data,
|
||||
static_cast<uint32_t>(curr_row[i++]->SizeInBytes())) != 0) {
|
||||
MS_LOG(ERROR) << "memcpy_s failed!";
|
||||
|
|
|
@ -407,7 +407,7 @@ Status CelebAOp::LoadTensorRow(const std::pair<std::string, std::vector<int32_t>
|
|||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
|
||||
TensorShape(std::vector<dsize_t>(1, num_elements)),
|
||||
data_schema_->column(0).type()));
|
||||
(void)handle.read(reinterpret_cast<char *>(image->StartAddr()), num_elements);
|
||||
(void)handle.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
|
|
|
@ -197,7 +197,7 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
|
|||
std::shared_ptr<Tensor> fine_label;
|
||||
std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first;
|
||||
std::shared_ptr<Tensor> copy_image =
|
||||
std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->StartAddr());
|
||||
std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetMutableBuffer());
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
|
||||
data_schema_->column(1).type(),
|
||||
reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0])));
|
||||
|
@ -394,7 +394,7 @@ Status CifarOp::ParseCifarData() {
|
|||
data_schema_->column(0).type()));
|
||||
for (int ch = 0; ch < kCifarImageChannel; ++ch) {
|
||||
for (int pix = 0; pix < kCifarImageHeight * kCifarImageWidth; ++pix) {
|
||||
(image_tensor->StartAddr())[pix * kCifarImageChannel + ch] = block[cur_block_index++];
|
||||
(image_tensor->GetMutableBuffer())[pix * kCifarImageChannel + ch] = block[cur_block_index++];
|
||||
}
|
||||
}
|
||||
cifar_image_label_pairs_.emplace_back(std::make_pair(image_tensor, labels));
|
||||
|
|
|
@ -216,7 +216,7 @@ Status ImageFolderOp::LoadTensorRow(ImageLabelPair pairPtr, TensorRow *trow) {
|
|||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
|
||||
TensorShape(std::vector<dsize_t>(1, num_elements)),
|
||||
data_schema_->column(0).type(), nullptr));
|
||||
(void)fs.read(reinterpret_cast<char *>(image->StartAddr()), num_elements);
|
||||
(void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
|
||||
fs.close();
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
|
|
|
@ -210,7 +210,7 @@ Status ManifestOp::LoadTensorRow(const std::pair<std::string, std::vector<std::s
|
|||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(),
|
||||
TensorShape(std::vector<dsize_t>(1, num_elements)),
|
||||
data_schema_->column(0).type(), nullptr));
|
||||
(void)fs.read(reinterpret_cast<char *>(image->StartAddr()), num_elements);
|
||||
(void)fs.read(reinterpret_cast<char *>(image->GetMutableBuffer()), num_elements);
|
||||
if (fs.fail()) {
|
||||
fs.close();
|
||||
RETURN_STATUS_UNEXPECTED("Fail to read file: " + data.first);
|
||||
|
|
|
@ -170,7 +170,7 @@ Status MnistOp::LoadTensorRow(const MnistLabelPair &mnist_pair, TensorRow *trow)
|
|||
int32_t l = mnist_pair.second;
|
||||
// make a copy of cached tensor
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(),
|
||||
mnist_pair.first->type(), mnist_pair.first->StartAddr()));
|
||||
mnist_pair.first->type(), mnist_pair.first->GetMutableBuffer()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
|
||||
data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l)));
|
||||
(*trow) = {std::move(image), std::move(label)};
|
||||
|
|
|
@ -127,7 +127,7 @@ Status RandomDataOp::GenerateSchema() {
|
|||
// For each column:
|
||||
// - choose a datatype
|
||||
// - generate a shape that randomly chooses the number of dimensions and the dimension values.
|
||||
DataType::Type newType = static_cast<DataType::Type>(GenRandomInt(0, kMaxDataType));
|
||||
DataType::Type newType = static_cast<DataType::Type>(GenRandomInt(0, DataType::NUM_OF_TYPES - 2));
|
||||
int32_t rank = GenRandomInt(1, kMaxRank);
|
||||
std::vector<dsize_t> dims;
|
||||
for (int32_t d = 0; d < rank; d++) {
|
||||
|
|
|
@ -43,7 +43,6 @@ class RandomDataOp : public ParallelOp {
|
|||
static constexpr int32_t kMaxNumColumns = 4;
|
||||
static constexpr int32_t kMaxRank = 4;
|
||||
static constexpr int32_t kMaxDimValue = 2048;
|
||||
static constexpr int32_t kMaxDataType = (DataType::DE_UNKNOWN - 1);
|
||||
static constexpr int32_t kMaxTotalRows = 1024;
|
||||
|
||||
// A nested builder class to aid in the construction of a RandomDataOp
|
||||
|
|
|
@ -58,7 +58,7 @@ Status DistributedSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer
|
|||
(*out_buffer) = std::make_unique<DataBuffer>(cnt_, DataBuffer::kDeBFlagNone);
|
||||
std::shared_ptr<Tensor> sample_ids;
|
||||
RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, samples_per_buffer_));
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr());
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer());
|
||||
while (cnt_ < samples_per_buffer_) {
|
||||
int64_t next_id = (num_devices_ * (cnt_++) + device_id_) % num_rows_;
|
||||
*(id_ptr++) = shuffle_ ? shuffle_vec_[static_cast<size_t>(next_id)] : next_id;
|
||||
|
|
|
@ -58,7 +58,7 @@ Status PKSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
|
|||
int64_t last_id =
|
||||
(samples_per_buffer_ + next_id_ > num_pk_samples_) ? num_pk_samples_ : samples_per_buffer_ + next_id_;
|
||||
RETURN_IF_NOT_OK(CreateSamplerTensor(&sample_ids, last_id - next_id_));
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->StartAddr());
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(sample_ids->GetMutableBuffer());
|
||||
while (next_id_ < last_id) {
|
||||
int64_t cls_id = next_id_++ / samples_per_class_;
|
||||
const std::vector<int64_t> &samples = label_to_ids_[labels_[cls_id]];
|
||||
|
|
|
@ -38,7 +38,7 @@ Status RandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer) {
|
|||
std::shared_ptr<Tensor> sampleIds;
|
||||
int64_t last_id = samples_per_buffer_ + next_id_ > num_samples_ ? num_samples_ : samples_per_buffer_ + next_id_;
|
||||
RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, last_id - next_id_));
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->StartAddr());
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer());
|
||||
for (int64_t i = 0; i < (last_id - next_id_); i++) {
|
||||
*(id_ptr + i) = replacement_ ? (*dist)(rnd_) : shuffled_ids_[static_cast<size_t>(i + next_id_)];
|
||||
}
|
||||
|
|
|
@ -40,7 +40,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t
|
|||
}
|
||||
TensorShape shape(std::vector<dsize_t>(1, num_elements));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type()));
|
||||
(void)(*sample_ids)->StartAddr(); // allocate memory in case user forgets!
|
||||
(void)(*sample_ids)->GetMutableBuffer(); // allocate memory in case user forgets!
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ Status SequentialSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffer)
|
|||
std::shared_ptr<Tensor> sampleIds;
|
||||
int64_t lastId = (samples_per_buffer_ + next_id_ > num_samples_) ? num_samples_ : samples_per_buffer_ + next_id_;
|
||||
RETURN_IF_NOT_OK(CreateSamplerTensor(&sampleIds, lastId - next_id_));
|
||||
int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->StartAddr());
|
||||
int64_t *idPtr = reinterpret_cast<int64_t *>(sampleIds->GetMutableBuffer());
|
||||
while (next_id_ < lastId) {
|
||||
*(idPtr++) = next_id_++;
|
||||
}
|
||||
|
|
|
@ -78,7 +78,7 @@ Status SubsetRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buffe
|
|||
RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));
|
||||
|
||||
// Initialize tensor
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->StartAddr());
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer());
|
||||
while (sample_id_ < last_id) {
|
||||
if (indices_[sample_id_] >= num_rows_) {
|
||||
std::string err_msg =
|
||||
|
|
|
@ -111,7 +111,7 @@ Status WeightedRandomSampler::GetNextBuffer(std::unique_ptr<DataBuffer> *out_buf
|
|||
RETURN_IF_NOT_OK(CreateSamplerTensor(&outputIds, last_id - sample_id_));
|
||||
|
||||
// Initialize tensor.
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->StartAddr());
|
||||
int64_t *id_ptr = reinterpret_cast<int64_t *>(outputIds->GetMutableBuffer());
|
||||
// Assign the data to tensor element.
|
||||
while (sample_id_ < last_id) {
|
||||
int64_t genId;
|
||||
|
|
|
@ -146,10 +146,7 @@ Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTa
|
|||
(*tensor_table)->push_back(std::move(tRow));
|
||||
|
||||
std::shared_ptr<Tensor> tensor;
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(&tensor, data_schema_->column(0).tensorImpl(),
|
||||
TensorShape(std::vector<dsize_t>(1, line.size())), data_schema_->column(0).type(),
|
||||
const_cast<unsigned char *>(reinterpret_cast<const unsigned char *>(common::SafeCStr(line)))));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar()));
|
||||
(**tensor_table)[row][0] = std::move(tensor);
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -759,7 +759,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type()));
|
||||
|
||||
// Tensors are lazily allocated, this eagerly allocates memory for the tensor.
|
||||
unsigned char *current_tensor_addr = (*tensor)->StartAddr();
|
||||
unsigned char *current_tensor_addr = (*tensor)->GetMutableBuffer();
|
||||
int64_t tensor_bytes_remaining = (*num_elements) * pad_size;
|
||||
|
||||
if (current_tensor_addr == nullptr) {
|
||||
|
@ -878,7 +878,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin
|
|||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type()));
|
||||
|
||||
// Tensors are lazily allocated, this eagerly allocates memory for the tensor.
|
||||
(void)(*tensor)->StartAddr();
|
||||
(void)(*tensor)->GetMutableBuffer();
|
||||
|
||||
int64_t i = 0;
|
||||
auto it = (*tensor)->begin<T>();
|
||||
|
|
|
@ -388,7 +388,7 @@ Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &co
|
|||
(void)fs.seekg(0, std::ios::beg);
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(tensor, col.tensorImpl(), TensorShape(std::vector<dsize_t>(1, num_elements)), col.type()));
|
||||
(void)fs.read(reinterpret_cast<char *>((*tensor)->StartAddr()), num_elements);
|
||||
(void)fs.read(reinterpret_cast<char *>((*tensor)->GetMutableBuffer()), num_elements);
|
||||
fs.close();
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
|
|
|
@ -110,7 +110,7 @@ TdtStatus TdtPlugin::translate(const TensorRow &ts_row, std::vector<DataItem> &i
|
|||
data_item.tensorShape_ = dataShapes;
|
||||
data_item.tensorType_ = datatype;
|
||||
data_item.dataLen_ = ts->SizeInBytes();
|
||||
data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<void *>(ts->StartAddr()), [](void *elem) {});
|
||||
data_item.dataPtr_ = std::shared_ptr<void>(reinterpret_cast<void *>(ts->GetMutableBuffer()), [](void *elem) {});
|
||||
items.emplace_back(data_item);
|
||||
MS_LOG(INFO) << "TDT data type is " << datatype << ", data shape is " << dataShapes << ", data length is "
|
||||
<< ts->Size() << ".";
|
||||
|
|
|
@ -162,7 +162,7 @@ void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
|
|||
Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), data_type));
|
||||
|
||||
static_cast<void>((*output)->StartAddr());
|
||||
static_cast<void>((*output)->GetMutableBuffer());
|
||||
switch (input->type().value()) {
|
||||
case DataType::DE_BOOL:
|
||||
CastFrom<bool>(input, output);
|
||||
|
@ -211,7 +211,7 @@ Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
|
|||
// initiate new tensor for type cast
|
||||
DataType new_type = DataType("float16");
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), new_type));
|
||||
static_cast<void>((*output)->StartAddr());
|
||||
static_cast<void>((*output)->GetMutableBuffer());
|
||||
|
||||
auto in_itr = input->begin<float>();
|
||||
auto out_itr = (*output)->begin<float16>();
|
||||
|
|
|
@ -64,7 +64,7 @@ Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int
|
|||
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
(void)output_cv->StartAddr();
|
||||
(void)output_cv->GetMutableBuffer();
|
||||
if (input_cv->mat().data) {
|
||||
try {
|
||||
cv::flip(input_cv->mat(), output_cv->mat(), flip_code);
|
||||
|
@ -125,10 +125,10 @@ bool HasJpegMagic(const unsigned char *data, size_t data_size) {
|
|||
}
|
||||
|
||||
Status Decode(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
if (input->StartAddr() == nullptr) {
|
||||
if (input->GetMutableBuffer() == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Tensor is nullptr");
|
||||
}
|
||||
if (HasJpegMagic(input->StartAddr(), input->SizeInBytes())) {
|
||||
if (HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) {
|
||||
return JpegCropAndDecode(input, output);
|
||||
} else {
|
||||
return DecodeCv(input, output);
|
||||
|
@ -282,7 +282,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
jerr.pub.error_exit = JpegErrorExitCustom;
|
||||
try {
|
||||
jpeg_create_decompress(&cinfo);
|
||||
JpegSetSource(&cinfo, input->StartAddr(), input->SizeInBytes());
|
||||
JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
|
||||
(void)jpeg_read_header(&cinfo, TRUE);
|
||||
RETURN_IF_NOT_OK(JpegSetColorSpace(&cinfo));
|
||||
jpeg_calc_output_dimensions(&cinfo);
|
||||
|
@ -311,7 +311,7 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
|
||||
auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8));
|
||||
const int buffer_size = output_tensor->SizeInBytes();
|
||||
JSAMPLE *buffer = static_cast<JSAMPLE *>(output_tensor->StartAddr());
|
||||
JSAMPLE *buffer = static_cast<JSAMPLE *>(output_tensor->GetMutableBuffer());
|
||||
const int max_scanlines_to_read = skipped_scanlines + crop_h;
|
||||
// stride refers to output tensor, which has 3 components at most
|
||||
const int stride = crop_w * kOutNumComponents;
|
||||
|
|
|
@ -31,7 +31,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
|
|||
if (input == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("input tensor is null");
|
||||
}
|
||||
if (!HasJpegMagic(input->StartAddr(), input->SizeInBytes())) {
|
||||
if (!HasJpegMagic(input->GetMutableBuffer(), input->SizeInBytes())) {
|
||||
DecodeOp op(true);
|
||||
std::shared_ptr<Tensor> decoded;
|
||||
RETURN_IF_NOT_OK(op.Compute(input, &decoded));
|
||||
|
@ -43,7 +43,7 @@ Status RandomCropDecodeResizeOp::Compute(const std::shared_ptr<Tensor> &input, s
|
|||
jerr.pub.error_exit = JpegErrorExitCustom;
|
||||
try {
|
||||
jpeg_create_decompress(&cinfo);
|
||||
JpegSetSource(&cinfo, input->StartAddr(), input->SizeInBytes());
|
||||
JpegSetSource(&cinfo, input->GetMutableBuffer(), input->SizeInBytes());
|
||||
(void)jpeg_read_header(&cinfo, TRUE);
|
||||
jpeg_calc_output_dimensions(&cinfo);
|
||||
} catch (std::runtime_error &e) {
|
||||
|
|
|
@ -50,6 +50,7 @@ SET(DE_UT_SRCS
|
|||
storage_op_test.cc
|
||||
task_manager_test.cc
|
||||
tensor_test.cc
|
||||
tensor_string_test.cc
|
||||
tensorshape_test.cc
|
||||
tfReader_op_test.cc
|
||||
to_float16_op_test.cc
|
||||
|
|
|
@ -60,7 +60,7 @@ void CVOpCommon::GetInputImage(std::string filename) {
|
|||
TensorShape in_shape({file_size});
|
||||
raw_input_tensor_ = std::make_shared<Tensor>(in_shape, DataType(DataType::DE_UINT8));
|
||||
|
||||
file.read(reinterpret_cast<char *>(raw_input_tensor_->StartAddr()), raw_input_tensor_->SizeInBytes());
|
||||
file.read(reinterpret_cast<char *>(raw_input_tensor_->GetMutableBuffer()), raw_input_tensor_->SizeInBytes());
|
||||
raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR);
|
||||
input_tensor_ = std::dynamic_pointer_cast<Tensor>(std::make_shared<CVTensor>(raw_cv_image_));
|
||||
SwapRedAndBlue(input_tensor_, &input_tensor_);
|
||||
|
|
|
@ -32,47 +32,47 @@ class MindDataTestDatatype : public UT::Common {
|
|||
|
||||
|
||||
TEST_F(MindDataTestDatatype, TestSizes) {
|
||||
uint8_t x = DataType::DE_BOOL_SIZE;
|
||||
uint8_t x = DataType::SIZE_IN_BYTES[DataType::DE_BOOL];
|
||||
DataType d = DataType(DataType::DE_BOOL);
|
||||
ASSERT_EQ(x, 1);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_INT8_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_INT8];
|
||||
d = DataType(DataType::DE_INT8);
|
||||
ASSERT_EQ(x, 1);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_UINT8_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_UINT8];
|
||||
d = DataType(DataType::DE_UINT8);
|
||||
ASSERT_EQ(x, 1);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_INT16_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_INT16];
|
||||
d = DataType(DataType::DE_INT16);
|
||||
ASSERT_EQ(x, 2);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_UINT16_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_UINT16];
|
||||
d = DataType(DataType::DE_UINT16);
|
||||
ASSERT_EQ(x, 2);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_INT32_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_INT32];
|
||||
d = DataType(DataType::DE_INT32);
|
||||
ASSERT_EQ(x, 4);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_UINT32_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_UINT32];
|
||||
d = DataType(DataType::DE_UINT32);
|
||||
ASSERT_EQ(x, 4);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_INT64_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_INT64];
|
||||
d = DataType(DataType::DE_INT64);
|
||||
ASSERT_EQ(x, 8);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_UINT64_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_UINT64];
|
||||
d = DataType(DataType::DE_UINT64);
|
||||
ASSERT_EQ(x, 8);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_FLOAT32_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_FLOAT32];
|
||||
d = DataType(DataType::DE_FLOAT32);
|
||||
ASSERT_EQ(x, 4);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
x = DataType::DE_FLOAT64_SIZE;
|
||||
x = DataType::SIZE_IN_BYTES[DataType::DE_FLOAT64];
|
||||
d = DataType(DataType::DE_FLOAT64);
|
||||
ASSERT_EQ(x, 8);
|
||||
ASSERT_EQ(d.SizeInBytes(), x);
|
||||
|
|
|
@ -74,7 +74,7 @@ Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements,
|
|||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, DataType(data_type), data));
|
||||
if (data == nullptr) {
|
||||
(*sample_ids)->StartAddr(); // allocate memory in case user forgets!
|
||||
(*sample_ids)->GetMutableBuffer(); // allocate memory in case user forgets!
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -190,7 +190,7 @@ TEST_F(MindDataTestMapOp, TestByPosition) {
|
|||
EXPECT_EQ(tensor_list[i]->type(), golden_types[i]);
|
||||
EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]);
|
||||
EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]);
|
||||
EXPECT_NE(tensor_list[i]->StartAddr(), nullptr);
|
||||
EXPECT_NE(tensor_list[i]->GetMutableBuffer(), nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -366,7 +366,7 @@ TEST_F(MindDataTestMapOp, Test1to3) {
|
|||
EXPECT_EQ(tensor_list[i]->type(), golden_types[i]);
|
||||
EXPECT_EQ(tensor_list[i]->Rank(), golden_ranks[i]);
|
||||
EXPECT_EQ(tensor_list[i]->shape(), golden_shapes[i]);
|
||||
EXPECT_NE(tensor_list[i]->StartAddr(), nullptr);
|
||||
EXPECT_NE(tensor_list[i]->GetMutableBuffer(), nullptr);
|
||||
}
|
||||
rc = di.FetchNextTensorRow(&tensor_list);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
@ -700,7 +700,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
|
|||
MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
|
||||
EXPECT_TRUE(img_class[(i % 44) / 11] == label);
|
||||
// Dump all the image into string, to be used as a comparison later.
|
||||
result.append((char *) tensor_map["image"]->StartAddr(), (int64_t) tensor_map["image"]->Size());
|
||||
result.append((char *)tensor_map["image"]->GetMutableBuffer(), (int64_t) tensor_map["image"]->Size());
|
||||
di.GetNextAsMap(&tensor_map);
|
||||
i++;
|
||||
}
|
||||
|
@ -745,7 +745,7 @@ TEST_F(MindDataTestMapOp, ImageFolder_Decode_Repeat_Resize) {
|
|||
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
|
||||
MS_LOG(DEBUG) << "row:" << i << "\tlabel:" << label << "\n";
|
||||
EXPECT_TRUE(img_class[(i % 44) / 11] == label);
|
||||
result2.append((char *) tensor_map["image"]->StartAddr(), (int64_t) tensor_map["image"]->Size());
|
||||
result2.append((char *)tensor_map["image"]->GetMutableBuffer(), (int64_t) tensor_map["image"]->Size());
|
||||
di2.GetNextAsMap(&tensor_map);
|
||||
i++;
|
||||
}
|
||||
|
|
|
@ -57,8 +57,8 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp2) {
|
|||
for (int i = 0; i < 100; i++) {
|
||||
(void)crop_and_decode.Compute(raw_input_tensor_, &crop_and_decode_output);
|
||||
(void)decode_and_crop.Compute(input_tensor_, &decode_and_crop_output);
|
||||
cv::Mat output1(target_height, target_width, CV_8UC3, crop_and_decode_output->StartAddr());
|
||||
cv::Mat output2(target_height, target_width, CV_8UC3, decode_and_crop_output->StartAddr());
|
||||
cv::Mat output1(target_height, target_width, CV_8UC3, crop_and_decode_output->GetMutableBuffer());
|
||||
cv::Mat output2(target_height, target_width, CV_8UC3, decode_and_crop_output->GetMutableBuffer());
|
||||
long int mse_sum = 0;
|
||||
long int count = 0;
|
||||
int a, b;
|
||||
|
@ -133,8 +133,8 @@ TEST_F(MindDataTestRandomCropDecodeResizeOp, TestOp1) {
|
|||
crop_and_decode_status = Crop(decoded, &decoded_and_cropped, x, y, crop_width, crop_height);
|
||||
decode_and_crop_status = JpegCropAndDecode(raw_input_tensor_, &cropped_and_decoded, x, y, crop_width, crop_height);
|
||||
{
|
||||
cv::Mat M1(crop_height, crop_width, CV_8UC3, decoded_and_cropped->StartAddr());
|
||||
cv::Mat M2(crop_height, crop_width, CV_8UC3, cropped_and_decoded->StartAddr());
|
||||
cv::Mat M1(crop_height, crop_width, CV_8UC3, decoded_and_cropped->GetMutableBuffer());
|
||||
cv::Mat M2(crop_height, crop_width, CV_8UC3, cropped_and_decoded->GetMutableBuffer());
|
||||
for (int i = 0; i < crop_height; ++i) {
|
||||
for (int j = 0; j < crop_width; ++j) {
|
||||
m1 = M1.at<cv::Vec3b>(i, j)[1];
|
||||
|
|
|
@ -34,7 +34,7 @@ Status CreateINT64Tensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elemen
|
|||
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape,
|
||||
DataType(DataType::DE_INT64), data));
|
||||
if (data == nullptr) {
|
||||
(*sample_ids)->StartAddr(); // allocate memory in case user forgets!
|
||||
(*sample_ids)->GetMutableBuffer(); // allocate memory in case user forgets!
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -0,0 +1,153 @@
|
|||
/**
|
||||
* Copyright 2019 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "dataset/core/client.h"
|
||||
#include "common/common.h"
|
||||
#include "gtest/gtest.h"
|
||||
#include "securec.h"
|
||||
#include "dataset/core/tensor.h"
|
||||
#include "dataset/core/cv_tensor.h"
|
||||
#include "dataset/core/data_type.h"
|
||||
#include "dataset/util/de_error.h"
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
|
||||
namespace py = pybind11;
|
||||
|
||||
class MindDataTestStringTensorDE : public UT::Common {
|
||||
public:
|
||||
MindDataTestStringTensorDE() = default;
|
||||
|
||||
void SetUp() override { GlobalInit(); }
|
||||
};
|
||||
|
||||
TEST_F(MindDataTestStringTensorDE, Basics) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>("Hi");
|
||||
ASSERT_TRUE(t->shape() == TensorShape({}));
|
||||
std::string_view s = "";
|
||||
t->GetItemAt(&s, {});
|
||||
ASSERT_TRUE(s == "Hi");
|
||||
|
||||
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(std::vector<std::string>{"Hi", "Bye"});
|
||||
ASSERT_TRUE(t2->shape() == TensorShape({2}));
|
||||
t2->GetItemAt(&s, {0});
|
||||
ASSERT_TRUE(s == "Hi");
|
||||
t2->GetItemAt(&s, {1});
|
||||
ASSERT_TRUE(s == "Bye");
|
||||
|
||||
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
|
||||
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
ASSERT_TRUE(t3->shape() == TensorShape({2, 3}));
|
||||
uint32_t index = 0;
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
for (uint32_t j = 0; j < 3; j++) {
|
||||
std::string_view s = "";
|
||||
t3->GetItemAt(&s, {i, j});
|
||||
ASSERT_TRUE(s == strings[index++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestStringTensorDE, Basics2) {
|
||||
std::shared_ptr<Tensor> t =
|
||||
std::make_shared<Tensor>(std::vector<std::string>{"abc", "defg", "hi", "klmno", "123", "789"}, TensorShape({2, 3}));
|
||||
ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 20);
|
||||
std::vector<uint32_t> offsets = {3, 8, 11, 17, 21, 25};
|
||||
uint32_t ctr = 0;
|
||||
for (auto i : offsets) {
|
||||
ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i);
|
||||
ctr += 4;
|
||||
}
|
||||
const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4;
|
||||
std::vector<uint32_t> starts = {0, 4, 9, 12, 18, 22};
|
||||
|
||||
uint32_t index = 0;
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
for (uint32_t j = 0; j < 3; j++) {
|
||||
std::string_view s = "";
|
||||
t->GetItemAt(&s, {i, j});
|
||||
ASSERT_TRUE(s.data() == buf + starts[index++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestStringTensorDE, Empty) {
|
||||
std::vector<std::string> strings{"abc", "defg", "", "", "123", ""};
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
// abc_defg___123__
|
||||
// 0123456789012345
|
||||
ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 10);
|
||||
std::vector<uint32_t> offsets = {3, 8, 9, 10, 14, 15};
|
||||
uint32_t ctr = 0;
|
||||
for (auto i : offsets) {
|
||||
ASSERT_TRUE(*(reinterpret_cast<uint32_t *>(t->GetMutableBuffer() + ctr)) == i);
|
||||
ctr += 4;
|
||||
}
|
||||
const char *buf = reinterpret_cast<char *>(t->GetMutableBuffer()) + 6 * 4;
|
||||
std::vector<uint32_t> starts = {0, 4, 9, 10, 11, 15};
|
||||
|
||||
uint32_t index = 0;
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
for (uint32_t j = 0; j < 3; j++) {
|
||||
std::string_view s = "";
|
||||
t->GetItemAt(&s, {i, j});
|
||||
ASSERT_TRUE(s.data() == buf + starts[index]);
|
||||
ASSERT_TRUE(s == strings[index++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestStringTensorDE, SetItem) {
|
||||
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
|
||||
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
ASSERT_TRUE(t3->shape() == TensorShape({2, 3}));
|
||||
|
||||
t3->SetItemAt({0, 1}, std::string{"xyzz"});
|
||||
strings[1] = "xyzz";
|
||||
|
||||
t3->SetItemAt({0, 2}, std::string{"07"});
|
||||
strings[2] = "07";
|
||||
|
||||
t3->SetItemAt({1, 2}, std::string{"987"});
|
||||
strings[5] = "987";
|
||||
|
||||
uint32_t index = 0;
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
for (uint32_t j = 0; j < 3; j++) {
|
||||
std::string_view s = "";
|
||||
t3->GetItemAt(&s, {i, j});
|
||||
ASSERT_TRUE(s == strings[index++]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestStringTensorDE, Iterator) {
|
||||
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
uint32_t index = 0;
|
||||
auto itr = t->begin<std::string_view>();
|
||||
for (; itr != t->end<std::string_view>(); itr++) {
|
||||
ASSERT_TRUE(*itr == strings[index++]);
|
||||
}
|
||||
|
||||
index = 0;
|
||||
itr = t->begin<std::string_view>();
|
||||
for (; itr != t->end<std::string_view>(); itr += 2) {
|
||||
ASSERT_TRUE(*itr == strings[index]);
|
||||
index += 2;
|
||||
}
|
||||
}
|
|
@ -111,17 +111,17 @@ TEST_F(MindDataTestTensorDE, CopyTensor) {
|
|||
int16_t o;
|
||||
t->GetItemAt<int16_t>(&o, {});
|
||||
ASSERT_EQ(o, -66);
|
||||
unsigned char *addr = t->StartAddr();
|
||||
unsigned char *addr = t->GetMutableBuffer();
|
||||
auto t2 = std::make_shared<Tensor>(std::move(*t));
|
||||
ASSERT_EQ(t2->shape(), TensorShape({}));
|
||||
ASSERT_EQ(t2->type(), DataType::DE_INT16);
|
||||
t2->GetItemAt<int16_t>(&o, {});
|
||||
ASSERT_EQ(o, -66);
|
||||
unsigned char *new_addr = t2->StartAddr();
|
||||
unsigned char *new_addr = t2->GetMutableBuffer();
|
||||
ASSERT_EQ(addr, new_addr);
|
||||
ASSERT_EQ(t->shape(), TensorShape::CreateUnknownRankShape());
|
||||
ASSERT_EQ(t->type(), DataType::DE_UNKNOWN);
|
||||
ASSERT_EQ(t->StartAddr(), nullptr);
|
||||
ASSERT_EQ(t->GetMutableBuffer(), nullptr);
|
||||
Status rc = t->GetItemAt<int16_t>(&o, {});
|
||||
ASSERT_TRUE(rc.IsError());
|
||||
}
|
||||
|
@ -237,7 +237,7 @@ TEST_F(MindDataTestTensorDE, Strides) {
|
|||
void checkCvMat(TensorShape shape, DataType type) {
|
||||
std::shared_ptr<CVTensor> t = std::make_shared<CVTensor>(shape, type);
|
||||
cv::Mat m = t->mat();
|
||||
ASSERT_EQ(m.data, t->StartAddr());
|
||||
ASSERT_EQ(m.data, t->GetMutableBuffer());
|
||||
ASSERT_EQ(static_cast<uchar>(m.type()) & static_cast<uchar>(CV_MAT_DEPTH_MASK), type.AsCVType());
|
||||
if (shape.Rank() < 4) {
|
||||
if (shape.Rank() > 1) {
|
||||
|
@ -311,15 +311,15 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
|
|||
TEST_F(MindDataTestTensorDE, CVTensorAs) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64));
|
||||
t->Fill<double>(2.2);
|
||||
unsigned char *addr = t->StartAddr();
|
||||
unsigned char *addr = t->GetMutableBuffer();
|
||||
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64));
|
||||
t2->Fill<double>(4.4);
|
||||
std::shared_ptr<CVTensor> ctv = CVTensor::AsCVTensor(t);
|
||||
ASSERT_EQ(t->StartAddr(), nullptr);
|
||||
ASSERT_EQ(ctv->StartAddr(), addr);
|
||||
ASSERT_EQ(t->GetMutableBuffer(), nullptr);
|
||||
ASSERT_EQ(ctv->GetMutableBuffer(), addr);
|
||||
cv::Mat m = ctv->mat();
|
||||
m = 2 * m;
|
||||
ASSERT_EQ(ctv->StartAddr(), addr);
|
||||
ASSERT_EQ(ctv->GetMutableBuffer(), addr);
|
||||
ASSERT_TRUE(*t2 == *ctv);
|
||||
MS_LOG(DEBUG) << *t2 << std::endl << *ctv;
|
||||
}
|
||||
|
|
|
@ -41,8 +41,8 @@ def test_textline_dataset_totext():
|
|||
count = 0
|
||||
line = ["This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone."]
|
||||
for i in data.create_dict_iterator():
|
||||
str = nlp.as_text(i["text"])
|
||||
assert(str == line[count])
|
||||
str = i["text"].item().decode("utf8")
|
||||
assert(str == line[count])
|
||||
count += 1
|
||||
assert(count == 5)
|
||||
|
||||
|
@ -68,8 +68,8 @@ def test_textline_dataset_repeat():
|
|||
"This is a text file.", "Be happy every day.", "Good luck to everyone.",
|
||||
"This is a text file.", "Be happy every day.", "Good luck to everyone."]
|
||||
for i in data.create_dict_iterator():
|
||||
str = nlp.as_text(i["text"])
|
||||
assert(str == line[count])
|
||||
str = i["text"].item().decode("utf8")
|
||||
assert(str == line[count])
|
||||
count += 1
|
||||
assert(count == 9)
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ def test_flat_map_1():
|
|||
import mindspore.dataset.transforms.nlp.utils as nlp
|
||||
|
||||
def flat_map_func(x):
|
||||
data_dir = nlp.as_text(x[0])
|
||||
data_dir = x[0].item().decode('utf8')
|
||||
d = ds.ImageFolderDatasetV2(data_dir)
|
||||
return d
|
||||
|
||||
|
@ -47,12 +47,12 @@ def test_flat_map_2():
|
|||
import mindspore.dataset.transforms.nlp.utils as nlp
|
||||
|
||||
def flat_map_func_1(x):
|
||||
data_dir = nlp.as_text(x[0])
|
||||
data_dir = x[0].item().decode('utf8')
|
||||
d = ds.ImageFolderDatasetV2(data_dir)
|
||||
return d
|
||||
|
||||
def flat_map_func_2(x):
|
||||
text_file = nlp.as_text(x[0])
|
||||
text_file = x[0].item().decode('utf8')
|
||||
d = ds.TextFileDataset(text_file)
|
||||
d = d.flat_map(flat_map_func_1)
|
||||
return d
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
# Copyright 2019 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
import mindspore._c_dataengine as cde
|
||||
import mindspore.dataset as ds
|
||||
import pytest
|
||||
|
||||
import numpy as np
|
||||
|
||||
|
||||
def test_basic():
|
||||
x = np.array([["ab", "cde", "121"], ["x", "km", "789"]], dtype='S')
|
||||
# x = np.array(["ab", "cde"], dtype='S')
|
||||
n = cde.Tensor(x)
|
||||
arr = n.as_array()
|
||||
y = np.array([1, 2])
|
||||
assert all(y == y)
|
||||
# assert np.testing.assert_array_equal(y,y)
|
||||
|
||||
|
||||
def compare(strings):
|
||||
arr = np.array(strings, dtype='S')
|
||||
|
||||
def gen():
|
||||
yield arr,
|
||||
|
||||
data = ds.GeneratorDataset(gen, column_names=["col"])
|
||||
|
||||
for d in data:
|
||||
np.testing.assert_array_equal(d[0], arr)
|
||||
|
||||
|
||||
def test_generator():
|
||||
compare(["ab"])
|
||||
compare(["ab", "cde", "121"])
|
||||
compare([["ab", "cde", "121"], ["x", "km", "789"]])
|
||||
|
||||
|
||||
def test_batching_strings():
|
||||
def gen():
|
||||
yield np.array(["ab", "cde", "121"], dtype='S'),
|
||||
|
||||
data = ds.GeneratorDataset(gen, column_names=["col"]).batch(10)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
for _ in data:
|
||||
pass
|
||||
assert "[Batch ERROR] Batch does not support" in str(info)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_generator()
|
||||
test_basic()
|
||||
test_batching_strings()
|
Loading…
Reference in New Issue