!2983 Dataset Tensor class cleanup

Merge pull request !2983 from h.farahat/tensor_class
This commit is contained in:
mindspore-ci-bot 2020-07-25 20:59:13 +08:00 committed by Gitee
commit 9c06a564d1
78 changed files with 1730 additions and 1540 deletions

View File

@ -511,8 +511,9 @@ Status DEPipeline::FetchDataFromTensorRow(const TensorRow &row,
RETURN_IF_NOT_OK(s); RETURN_IF_NOT_OK(s);
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data); if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
} else if (column_type == DataType::DE_STRING) { } else if (column_type == DataType::DE_STRING) {
auto buffer = tensor->GetStringsBuffer(); std::string_view sv;
std::string ss(reinterpret_cast<const char *>(buffer)); // assume scalar string tensor RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor
std::string ss(sv);
(*row_raw_data)[column_name] = std::move(ss); (*row_raw_data)[column_name] = std::move(ss);
continue; continue;
} else { } else {
@ -1678,13 +1679,13 @@ Status DEPipeline::ParsePadInfo(py::handle value, PadInfo *pad_info) {
if (py::isinstance<py::str>(tp[1])) { if (py::isinstance<py::str>(tp[1])) {
std::string pad_val_string = tp[1].is_none() ? "" : ToString(tp[1]); std::string pad_val_string = tp[1].is_none() ? "" : ToString(tp[1]);
CHECK_FAIL_RETURN_UNEXPECTED( CHECK_FAIL_RETURN_UNEXPECTED(
Tensor::CreateTensor(&pad_val, std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar()), Tensor::CreateFromVector(std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar(), &pad_val),
"Cannot create pad_value Tensor"); "Cannot create pad_value Tensor");
} else { } else {
float pad_val_float = tp[1].is_none() ? 0 : ToFloat(tp[1]); float pad_val_float = tp[1].is_none() ? 0 : ToFloat(tp[1]);
CHECK_FAIL_RETURN_UNEXPECTED(Tensor::CreateTensor(&pad_val, TensorImpl::kFlexible, TensorShape::CreateScalar(), CHECK_FAIL_RETURN_UNEXPECTED(
DataType(DataType::DE_FLOAT32)), Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_val),
"Cannot create pad_value Tensor"); "Cannot create pad_value Tensor");
pad_val->SetItemAt<float>({}, pad_val_float); pad_val->SetItemAt<float>({}, pad_val_float);
} }
(void)pad_info->insert({ToString(p.first), {shape, pad_val}}); (void)pad_info->insert({ToString(p.first), {shape, pad_val}});

View File

@ -340,7 +340,7 @@ void bindTensor(py::module *m) {
(void)py::class_<Tensor, std::shared_ptr<Tensor>>(*m, "Tensor", py::buffer_protocol()) (void)py::class_<Tensor, std::shared_ptr<Tensor>>(*m, "Tensor", py::buffer_protocol())
.def(py::init([](py::array arr) { .def(py::init([](py::array arr) {
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
THROW_IF_ERROR(Tensor::CreateTensor(&out, arr)); THROW_IF_ERROR(Tensor::CreateFromNpArray(arr, &out));
return out; return out;
})) }))
.def_buffer([](Tensor &tensor) { .def_buffer([](Tensor &tensor) {
@ -364,7 +364,18 @@ void bindTensor(py::module *m) {
}); });
(void)py::class_<TensorShape>(*m, "TensorShape") (void)py::class_<TensorShape>(*m, "TensorShape")
.def(py::init<py::list>()) .def(py::init([](const py::list &list) {
std::vector<dsize_t> list_c;
for (auto &i : list) {
if (!i.is_none()) {
list_c.push_back(i.cast<int>());
} else {
list_c.push_back(TensorShape::kDimUnknown);
}
}
TensorShape out(list_c);
return out;
}))
.def("__str__", &TensorShape::ToString) .def("__str__", &TensorShape::ToString)
.def("as_list", &TensorShape::AsPyList) .def("as_list", &TensorShape::AsPyList)
.def("is_known", &TensorShape::known); .def("is_known", &TensorShape::known);

View File

@ -23,18 +23,35 @@
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
CVTensor::CVTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) {
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
}
CVTensor::CVTensor(const TensorShape &shape, const DataType &type, const uchar *data) : Tensor(shape, type, data) {
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
}
CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) { CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) {
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
} }
Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) {
const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
*out = std::allocate_shared<CVTensor>(*alloc, shape, type);
int64_t byte_size = (*out)->SizeInBytes();
// Don't allocate if we have a tensor with no elements.
if (byte_size != 0) {
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
}
return (*out)->MatInit((*out)->GetMutableBuffer(), (*out)->shape_, (*out)->type_, &(*out)->mat_);
}
Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) {
TensorPtr out_tensor;
cv::Mat mat_local = mat;
// if the input Mat's memory is not continuous, copy it to one block of memory
if (!mat.isContinuous()) mat_local = mat.clone();
TensorShape shape(mat.size, mat_local.type());
DataType type = DataType::FromCVType(mat_local.type());
RETURN_IF_NOT_OK(CreateFromMemory(shape, type, mat_local.data, &out_tensor));
*out = AsCVTensor(out_tensor);
return Status::OK();
}
std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) { std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) {
std::array<int, 2> size = {1, 1}; std::array<int, 2> size = {1, 1};
if (shape.Rank() <= 2 || (shape.Rank() == 3 && shape[2] <= CV_CN_MAX)) { if (shape.Rank() <= 2 || (shape.Rank() == 3 && shape[2] <= CV_CN_MAX)) {
@ -57,7 +74,8 @@ std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
if (cv_t != nullptr) { if (cv_t != nullptr) {
return cv_t; return cv_t;
} else { } else {
return std::make_shared<CVTensor>(t); const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
return std::allocate_shared<CVTensor>(*alloc, t);
} }
} }
@ -97,5 +115,13 @@ void CVTensor::Squeeze() {
Tensor::Squeeze(); Tensor::Squeeze();
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_); (void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
} }
Status CVTensor::MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat) {
uchar *start = nullptr;
TensorShape remaining({-1});
RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining));
RETURN_IF_NOT_OK(this->MatInit(start, remaining, type_, mat));
return Status::OK();
}
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

View File

@ -30,56 +30,60 @@
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
using CVTensorPtr = std::shared_ptr<CVTensor>;
class CVTensor : public Tensor { class CVTensor : public Tensor {
public: public:
// Create an empty CVTensor of shape `shape` and type `type`. // Inherit Tensor's constructors
// @note The shape and type information should be known and valid. using Tensor::Tensor;
// @param shape TensorShape
// @param type DataType
CVTensor(const TensorShape &shape, const DataType &type);
// Create a CVTensor from a given buffer, shape and type. /// Create a CVTensor from a given tensor. This constructor should not be used directly, use Create* instead.
// @note This constructor allocates a new space in the memory and copies the buffer into it. /// The input tensor will be invalidated (i.e., the shape and type will be
// @note The buffer should be valid and the shape and type information should be known and valid. /// set to unknown and the data buffer will point to null.
// @param shape TensorShape /// \note there is no memory copying here, the buffer will be assigned to the constructed tensor.
// @param type DataType /// \param tensor
// @param data unsigned char*, pointer to the data.
CVTensor(const TensorShape &shape, const DataType &type, const uchar *data);
// Create a CVTensor from a given CV::Mat.
// @note This constructor allocates a new space in the memory and copies the CV::Mat buffer into it.
// @param mat CV::Mat
explicit CVTensor(const cv::Mat &mat)
: CVTensor(TensorShape(mat.size, mat.type()), DataType::FromCVType(mat.type()), mat.data) {}
~CVTensor() = default;
// Static function to cast a given Tensor as CVTensor. If the input tensor is already of type CVTensor,
// this function would be treated as a no-op. Fot other tensor types, a new CVTensor is created based on the data
// provided. The Passed Tensor will be invalidated.
// @note there is no memory copying here, the buffer will be assigned to the constructed tensor.
// @param tensor
// @return CVTensor
static std::shared_ptr<CVTensor> AsCVTensor(std::shared_ptr<Tensor> tensor);
// Create a CVTensor from a given tensor. The input tensor will be invalidated (i.e., the shape and type will be
// set to unknown and the data buffer will point to null.
// @note there is no memory copying here, the buffer will be assigned to the constructed tensor.
// @param tensor
explicit CVTensor(std::shared_ptr<Tensor> tensor); explicit CVTensor(std::shared_ptr<Tensor> tensor);
// Getter function for the CV::Mat /// Create CV tensor with type and shape. Items of the tensor would be uninitialized.
// @return /// \param shape [in] shape of the output tensor
/// \param type [in] type of the output tensor
/// \param out [out] Generated tensor
/// \return Status code
static Status CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out);
/// Create CV tensor from cv::Mat
/// \note This constructor allocates a new space in the memory and copies the CV::Mat buffer into it.
/// \param mat [in] cv::Mat to be copied into the new tensor.
/// \param out [out] Generated tensor
/// \return Status code
static Status CreateFromMat(const cv::Mat &mat, CVTensorPtr *out);
~CVTensor() override = default;
/// Static function to cast a given Tensor as CVTensor. If the input tensor is already of type CVTensor,
/// this function would be treated as a no-op. Fot other tensor types, a new CVTensor is created based on the data
/// provided. The Passed Tensor will be invalidated.
/// \note the input tensor will be invalidated.
/// \note there is no memory copying here, the buffer will be assigned to the constructed tensor.
/// \param tensor [in]
/// \return CVTensor
static std::shared_ptr<CVTensor> AsCVTensor(std::shared_ptr<Tensor> tensor);
/// Get a reference to the CV::Mat
/// \return a reference to the internal CV::Mat
cv::Mat mat() const { return mat_; } cv::Mat mat() const { return mat_; }
// Static function to check if the passed information (shape and type) can be treated as a valid description /// Get a copy of the CV::Mat
// of an image in OpenCV. Moreover, it returns OpenCV shape and type /// \return a copy of internal CV::Mat
// For example, if the shape is <512,512,3> and type is DE_UINT8, the output would be [512,512] and CV_8UC3. cv::Mat matCopy() const { return mat_.clone(); }
// In case of invalid shape or type, the function will return pair<null,0>
// @param shape TensorShape /// Static function to check if the passed information (shape and type) can be treated as a valid description
// @param type DataType /// of an image in OpenCV. Moreover, it returns OpenCV shape and type
// @return std::pair of OpenCV shape and type /// For example, if the shape is <512,512,3> and type is DE_UINT8, the output would be [512,512] and CV_8UC3.
std::pair<std::array<int, 2>, int> IsValidImage(const TensorShape &shape, const DataType &type); /// In case of invalid shape or type, the function will return pair<null,0>
/// \param shape [in] TensorShape
/// \param type [in] DataType
/// \return std::pair of OpenCV shape and type
static std::pair<std::array<int, 2>, int> IsValidImage(const TensorShape &shape, const DataType &type);
Status Reshape(const TensorShape &shape) override; Status Reshape(const TensorShape &shape) override;
@ -87,18 +91,19 @@ class CVTensor : public Tensor {
void Squeeze() override; void Squeeze() override;
Status Mat(const std::vector<dsize_t> &index, cv::Mat *mat) { Status MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat);
uchar *start = nullptr;
TensorShape remaining({-1});
RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining));
RETURN_IF_NOT_OK(this->MatInit(start, remaining, type_, mat));
return Status::OK();
}
private: private:
/// Opencv Mat object wrapping the raw data of the tensor.
/// Modifying the content of the matrix, modifies the tensor.
cv::Mat mat_; cv::Mat mat_;
// Initialize CV::Mat with the data_, shape_ and type_ /// Create cv::Mat from data, TensorShape and DataType
/// \param data [in] Pointer to the data in memory.
/// \param shape [in] Shape of the tensor.
/// \param type [in] Type of the tensor.
/// \param mat [out] cv::Mat initialized with the provided data.
/// \return Status code
Status MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat); Status MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat);
}; };
} // namespace dataset } // namespace dataset

View File

@ -284,6 +284,11 @@ inline DataType DataType::FromCType<std::string_view>() {
return DataType(DataType::DE_STRING); return DataType(DataType::DE_STRING);
} }
template <>
inline DataType DataType::FromCType<std::string>() {
return DataType(DataType::DE_STRING);
}
template <> template <>
inline bool DataType::IsLooselyCompatible<bool>() const { inline bool DataType::IsLooselyCompatible<bool>() const {
return type_ == DataType::DE_BOOL; return type_ == DataType::DE_BOOL;

View File

@ -59,49 +59,11 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape),
data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
} }
Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data) : Tensor(shape, type) {
if (type.IsNumeric()) {
// If the data pointer was given, then we can also populate the tensor with data
if (data != nullptr) {
// Given the shape/type of this tensor, compute the data size and copy in the input bytes.
int64_t byte_size = this->SizeInBytes();
Status s = this->AllocateBuffer(byte_size); // Allocates data_ inside itself
if (s.IsOk() && data_ != nullptr) {
int ret_code = memcpy_s(data_, byte_size, data, byte_size);
if (ret_code != 0) {
MS_LOG(ERROR) << "Failed to copy data into Tensor!";
}
} else {
MS_LOG(ERROR) << "Failed to create memory for Tensor!";
}
}
} else {
MS_LOG(ERROR) << "Type should be numeric to use this constructor.";
}
}
Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length)
: Tensor(shape, type) {
// If the data pointer was given, then we can also populate the tensor with data
if (data != nullptr) {
// Allocates data_ inside itself
Status s = AllocateBuffer(length);
if (s.IsError()) {
MS_LOG(ERROR) << "Failed to create memory for Tensor!";
}
if (data_ != nullptr) {
int ret_code = memcpy_s(data_, length, data, length);
if (ret_code != 0) {
MS_LOG(ERROR) << "Failed to copy data into Tensor!";
}
}
}
}
Tensor::Tensor(Tensor &&other) noexcept Tensor::Tensor(Tensor &&other) noexcept
: shape_(other.shape()), : shape_(other.shape()),
type_(other.type()), type_(other.type()),
data_(other.GetMutableBuffer()), data_(other.GetMutableBuffer()),
data_end_(other.data_end_),
data_allocator_(std::move(other.data_allocator_)) { data_allocator_(std::move(other.data_allocator_)) {
other.Invalidate(); other.Invalidate();
} }
@ -117,118 +79,61 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
} }
return *this; return *this;
} }
Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape) CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
: Tensor(TensorShape({static_cast<dsize_t>(strings.size())}), DataType(DataType::DE_STRING)) { CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; }; const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
dsize_t total_length = std::accumulate(strings.begin(), strings.end(), 0, length_sum); *out = std::allocate_shared<Tensor>(*alloc, shape, type);
// if it's a string tensor and it has no elements, Just initialize the shape and type.
// total bytes needed = offset array + strings if (!type.IsNumeric() && shape.NumOfElements() == 0) {
// offset array needs to store one offset var per element + 1 extra to get the length of the last string. return Status::OK();
// strings will be null-terminated --> need 1 extra byte per element
dsize_t num_bytes = (kOffsetSize + 1) * shape_.NumOfElements() + kOffsetSize + total_length;
data_ = data_allocator_->allocate(num_bytes);
auto offset_arr = reinterpret_cast<offset_t *>(data_);
uchar *buf = GetStringsBuffer();
offset_t offset = buf - data_; // the first string will start here
uint32_t i = 0;
for (const auto &str : strings) {
// insert the start index of the string.
offset_arr[i++] = offset;
// total bytes are reduced by kOffsetSize
num_bytes -= kOffsetSize;
// insert actual string
int ret_code = memcpy_s(data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
// next string will be stored right after the current one.
offset = offset + str.length() + 1;
// total bytes are reduced by the length of the string
num_bytes -= str.length() + 1;
} }
// store one more offset value so we can get the length of the last string
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
offset_arr[i] = offset;
this->data_end_ = data_ + offset_arr[i]; CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
MS_ASSERT(num_bytes == 0); int64_t byte_size = (*out)->SizeInBytes();
if (shape.known()) Tensor::Reshape(shape); // Don't allocate if we have a tensor with no elements.
if (byte_size != 0) {
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
}
return Status::OK();
}
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
if (src != nullptr) {
// Given the shape/type of this tensor, compute the data size and copy in the input bytes.
int64_t byte_size = (*out)->SizeInBytes();
int ret_code = memcpy_s((*out)->data_, byte_size, src, byte_size);
CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
}
return Status::OK();
} }
Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape) Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src,
: Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) { const dsize_t &length, TensorPtr *out) {
// total bytes needed = offset array + strings CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr, "Pointer to source data is null.");
// offset array needs to store one offset var per element + 1 extra to get the length of the last string. const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
// strings will be null-terminated --> need 1 extra byte per element *out = std::allocate_shared<Tensor>(*alloc, shape, type);
dsize_t num_bytes = (kOffsetSize)*shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong(); if (type.IsNumeric()) {
dsize_t calculated_length = (*out)->SizeInBytes();
data_ = data_allocator_->allocate(num_bytes); CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
} else {
auto offset_arr = reinterpret_cast<offset_t *>(data_); // min_length is the length of a tensor with empty strings
uchar *buf = GetStringsBuffer(); // min_length = the number of bytes needed to store the offsets + 1 byte for each element
dsize_t min_length = (shape.NumOfElements() + 1) * kOffsetSize + shape.NumOfElements();
offset_t offset = buf - data_; // the first string will start here CHECK_FAIL_RETURN_UNEXPECTED(min_length <= length, "Length of source data does not match the shape.");
uint32_t i = 0;
for (; i < bytes_list.value_size(); i++) {
const std::string &str = bytes_list.value(i);
// insert the start index of the string.
offset_arr[i] = offset;
// total bytes are reduced by kOffsetSize
num_bytes -= kOffsetSize;
// insert actual string
int ret_code = memcpy_s(data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
if (ret_code != 0) {
MS_LOG(ERROR) << "Cannot copy string into Tensor";
}
// next string will be stored right after the current one.
offset = offset + str.length() + 1;
// total bytes are reduced by the length of the string
num_bytes -= str.length() + 1;
}
// store one more offset value so we can get the length of the last string
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
offset_arr[i] = offset;
data_end_ = data_ + offset_arr[i];
MS_ASSERT(num_bytes == 0);
if (shape.known()) Tensor::Reshape(shape);
}
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
DataType type, const unsigned char *data) {
if (!shape.known()) {
RETURN_STATUS_UNEXPECTED("Invalid shape.");
}
if (type == DataType::DE_UNKNOWN) {
RETURN_STATUS_UNEXPECTED("Invalid data type.");
} }
switch (tensor_impl) { RETURN_IF_NOT_OK((*out)->AllocateBuffer(length));
case TensorImpl::kFlexible: { int ret_code = memcpy_s((*out)->data_, length, src, length);
// The flex tensor is really just the base class tensor implementation CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*ptr = std::allocate_shared<Tensor>(*alloc, shape, type, data); return Status::OK();
break;
}
case TensorImpl::kCv: {
const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
*ptr = std::allocate_shared<CVTensor>(*alloc, shape, type, data);
break;
}
default: {
std::string err_msg("Invalid tensor implementation type.");
RETURN_STATUS_UNEXPECTED(err_msg);
}
}
return Status::OK(); // returns base-class shared_ptr
} }
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) { Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
std::vector<dsize_t> shape; std::vector<dsize_t> shape;
for (dsize_t i = 0; i < arr.ndim(); i++) { for (dsize_t i = 0; i < arr.ndim(); i++) {
shape.push_back(static_cast<dsize_t>(arr.shape()[i])); shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
@ -244,34 +149,38 @@ Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::arr
arr.resize(shape); // resize arr back to the original shape arr.resize(shape); // resize arr back to the original shape
return CreateTensor(ptr, strings, TensorShape{shape}); return CreateFromVector(strings, TensorShape{shape}, out);
} }
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) { Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) {
if (DataType::FromNpArray(arr) == DataType::DE_STRING) { if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
return CreateTensorFromNumpyString(ptr, arr); return CreateFromNpString(arr, out);
} }
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*ptr = std::allocate_shared<Tensor>(*alloc, TensorShape({}), DataType(DataType::DE_UNKNOWN)); *out = std::allocate_shared<Tensor>(*alloc, TensorShape::CreateScalar(), DataType(DataType::DE_UNKNOWN));
std::vector<dsize_t> shape; std::vector<dsize_t> shape;
for (dsize_t i = 0; i < arr.ndim(); i++) { for (dsize_t i = 0; i < arr.ndim(); i++) {
shape.push_back(static_cast<dsize_t>(arr.shape()[i])); shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
} }
(*ptr)->shape_ = TensorShape(shape); (*out)->shape_ = TensorShape(shape);
(*ptr)->type_ = DataType::FromNpArray(arr); (*out)->type_ = DataType::FromNpArray(arr);
if (!(*ptr)->shape_.known()) RETURN_STATUS_UNEXPECTED("Invalid shape."); if (!(*out)->shape_.known()) RETURN_STATUS_UNEXPECTED("Invalid shape.");
if ((*ptr)->type_ == DataType::DE_UNKNOWN) RETURN_STATUS_UNEXPECTED("Invalid data type."); if ((*out)->type_ == DataType::DE_UNKNOWN) RETURN_STATUS_UNEXPECTED("Invalid data type.");
std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool(); std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
(*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool); (*out)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
int64_t byte_size = (*ptr)->SizeInBytes(); int64_t byte_size = (*out)->SizeInBytes();
RETURN_IF_NOT_OK((*ptr)->AllocateBuffer(byte_size)); if (byte_size == 0) {
return Status::OK();
}
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
unsigned char *data = static_cast<unsigned char *>(arr.request().ptr); unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
if ((*ptr)->data_ == nullptr) { if ((*out)->data_ == nullptr) {
RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor."); RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor.");
} }
@ -282,61 +191,89 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
// check if strides are contiguous // check if strides are contiguous
bool is_strided = false; bool is_strided = false;
dsize_t count = (*ptr)->shape_.NumOfElements(); dsize_t count = (*out)->shape_.NumOfElements();
for (size_t i = 0; i < shape.size(); i++) { for (size_t i = 0; i < shape.size(); i++) {
count /= shape[i]; count /= shape[i];
if (strides[i] != (*ptr)->type_.SizeInBytes() * count) { if (strides[i] != (*out)->type_.SizeInBytes() * count) {
is_strided = true; is_strided = true;
break; break;
} }
} }
if (is_strided) { if (is_strided) {
RETURN_IF_NOT_OK(CopyStridedArray((*ptr)->data_, data, shape, strides, (*ptr)->type_.SizeInBytes())); RETURN_IF_NOT_OK(CopyStridedArray((*out)->data_, data, shape, strides, (*out)->type_.SizeInBytes()));
} else { } else {
int ret_code = memcpy_s((*ptr)->data_, byte_size, data, byte_size); int ret_code = memcpy_s((*out)->data_, byte_size, data, byte_size);
if (ret_code != 0) { if (ret_code != 0) {
RETURN_STATUS_UNEXPECTED("Failed to copy data into Tensor."); RETURN_STATUS_UNEXPECTED("Failed to copy data into Tensor.");
} }
} }
return Status::OK(); // returns base-class shared_ptr return Status::OK();
} }
#endif #endif
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
const TensorShape &shape) {
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator(); const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*ptr = std::allocate_shared<Tensor>(*alloc, strings, shape); *out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
DataType(DataType::DE_STRING));
// total bytes needed = offset array + strings
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
// strings will be null-terminated --> need 1 extra byte per element
dsize_t num_bytes = (kOffsetSize) * (*out)->shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
(*out)->data_ = (*out)->data_allocator_->allocate(num_bytes);
auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
uchar *buf = (*out)->GetStringsBuffer();
offset_t offset = buf - (*out)->data_; // the first string will start here
uint32_t i = 0;
for (; i < bytes_list.value_size(); i++) {
const std::string &str = bytes_list.value(i);
// insert the start index of the string.
offset_arr[i] = offset;
// total bytes are reduced by kOffsetSize
num_bytes -= kOffsetSize;
// insert actual string
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
if (ret_code != 0) {
MS_LOG(ERROR) << "Cannot copy string into Tensor";
}
// next string will be stored right after the current one.
offset = offset + str.length() + 1;
// total bytes are reduced by the length of the string
num_bytes -= str.length() + 1;
}
// store one more offset value so we can get the length of the last string
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
offset_arr[i] = offset;
(*out)->data_end_ = (*out)->data_ + offset_arr[i];
MS_ASSERT(num_bytes == 0);
(*out)->Reshape(shape);
return Status::OK(); return Status::OK();
} }
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
const TensorShape &shape) {
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*ptr = std::allocate_shared<Tensor>(*alloc, bytes_list, shape);
return Status::OK();
}
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &file_path) {
std::ifstream fs; std::ifstream fs;
fs.open(file_path, std::ios::binary | std::ios::in); fs.open(path, std::ios::binary | std::ios::in);
CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + file_path); CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + path);
int64_t num_bytes = fs.seekg(0, std::ios::end).tellg(); int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file"); CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file");
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
Tensor::CreateTensor(ptr, TensorImpl::kFlexible, TensorShape{num_bytes}, DataType(DataType::DE_UINT8))); int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
int64_t written_bytes = fs.read(reinterpret_cast<char *>((*ptr)->GetMutableBuffer()), num_bytes).gcount();
CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), "Error in writing to tensor"); CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), "Error in writing to tensor");
fs.close(); fs.close();
return Status::OK(); return Status::OK();
} }
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
const TensorShape &shape, const DataType &type, dsize_t pad_size) { const DataType &type, dsize_t pad_size, TensorPtr *out) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(ptr, TensorImpl::kFlexible, shape, type)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
unsigned char *current_tensor_addr = (*ptr)->GetMutableBuffer(); unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size; int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
for (int i = 0; i < bytes_list.value_size(); i++) { for (int i = 0; i < bytes_list.value_size(); i++) {
@ -368,7 +305,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::Byte
// Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied) // Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape, Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
std::vector<dsize_t> strides, uint8_t type_size) { std::vector<dsize_t> strides, uint8_t type_size) {
dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<dsize_t>()); dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
for (dsize_t i = 0; i < size; ++i) { for (dsize_t i = 0; i < size; ++i) {
dsize_t offset = 0; dsize_t offset = 0;
dsize_t count = i; dsize_t count = i;
@ -429,29 +366,29 @@ void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) c
MS_ASSERT(data_); MS_ASSERT(data_);
switch (type_.value()) { switch (type_.value()) {
CASE_PRINT_HEX(DataType::DE_BOOL, bool); CASE_PRINT_HEX(DataType::DE_BOOL, bool)
CASE_PRINT_HEX(DataType::DE_INT8, int8_t); CASE_PRINT_HEX(DataType::DE_INT8, int8_t)
CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t); CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t)
CASE_PRINT(DataType::DE_INT16, int16_t); CASE_PRINT(DataType::DE_INT16, int16_t)
CASE_PRINT(DataType::DE_UINT16, uint16_t); CASE_PRINT(DataType::DE_UINT16, uint16_t)
CASE_PRINT(DataType::DE_INT32, int32_t); CASE_PRINT(DataType::DE_INT32, int32_t)
CASE_PRINT(DataType::DE_UINT32, uint32_t); CASE_PRINT(DataType::DE_UINT32, uint32_t)
CASE_PRINT(DataType::DE_INT64, int64_t); CASE_PRINT(DataType::DE_INT64, int64_t)
CASE_PRINT(DataType::DE_UINT64, uint64_t); CASE_PRINT(DataType::DE_UINT64, uint64_t)
CASE_PRINT(DataType::DE_FLOAT16, float16); CASE_PRINT(DataType::DE_FLOAT16, float16)
CASE_PRINT(DataType::DE_FLOAT32, float); CASE_PRINT(DataType::DE_FLOAT32, float)
CASE_PRINT(DataType::DE_FLOAT64, double); CASE_PRINT(DataType::DE_FLOAT64, double)
case DataType::DE_STRING: { case DataType::DE_STRING: {
std::string_view o{""}; std::string_view o{""};
@ -501,50 +438,14 @@ void Tensor::Print(std::ostream &out) const {
} }
} }
Status Tensor::AllocateBuffer(const dsize_t &length) { Status Tensor::AllocateBuffer(const dsize_t &length) {
RETURN_UNEXPECTED_IF_NULL(data_allocator_);
if (data_ == nullptr) { if (data_ == nullptr) {
if (data_allocator_ != nullptr) { data_ = data_allocator_->allocate(length);
data_ = data_allocator_->allocate(length); CHECK_FAIL_RETURN_UNEXPECTED(data_ != nullptr, "Failed to allocate memory for tensor.");
RETURN_UNEXPECTED_IF_NULL(data_); data_end_ = data_ + length;
data_end_ = data_ + length;
} else {
data_ = static_cast<unsigned char *>(malloc(length));
data_end_ = data_ + length;
RETURN_UNEXPECTED_IF_NULL(data_);
}
} }
return Status::OK(); return Status::OK();
} }
const unsigned char *Tensor::GetBuffer() const {
// This version cannot modify anything. data_ could possibly be null.
return data_;
}
// check for empty
bool Tensor::HasData() const {
if (data_ == nullptr) {
return true;
} else {
return false;
}
}
unsigned char *Tensor::GetMutableBuffer() {
if (!shape_.known() || type_ == DataType::DE_UNKNOWN) {
return nullptr;
}
// If the data area is already created, return the pointer to it
if (data_ != nullptr) {
return data_;
} else {
// If the data area is not created, then identify the memory size based
// on the shape and type and allocate it.
if (this->AllocateBuffer(this->SizeInBytes()).IsOk()) {
return data_;
} else {
return nullptr;
}
}
}
Status Tensor::Reshape(const TensorShape &shape) { Status Tensor::Reshape(const TensorShape &shape) {
if (shape.NumOfElements() == shape_.NumOfElements()) { if (shape.NumOfElements() == shape_.NumOfElements()) {
@ -628,7 +529,7 @@ Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_p
err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : ""; err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : "";
err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : "";
uchar *start_addr_of_ind = nullptr; uchar *start_addr_of_ind = nullptr;
TensorShape remaining_shape({-1}); TensorShape remaining_shape = TensorShape::CreateUnknownRankShape();
err_msg += (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : ""; err_msg += (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : ""; err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : "";
if (!err_msg.empty()) { if (!err_msg.empty()) {
@ -697,7 +598,7 @@ Status Tensor::ExpandDim(const dsize_t &axis) {
return Status::OK(); return Status::OK();
} }
std::vector<dsize_t> Tensor::Strides() { std::vector<dsize_t> Tensor::Strides() const {
std::vector<dsize_t> strides = shape_.Strides(); std::vector<dsize_t> strides = shape_.Strides();
uint8_t size = type_.SizeInBytes(); uint8_t size = type_.SizeInBytes();
std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; }); std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
@ -765,7 +666,6 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
// return data as numpy, should return status // return data as numpy, should return status
Status Tensor::GetDataAsNumpy(py::array *data) { Status Tensor::GetDataAsNumpy(py::array *data) {
RETURN_UNEXPECTED_IF_NULL(data_);
RETURN_UNEXPECTED_IF_NULL(data); RETURN_UNEXPECTED_IF_NULL(data);
if (type_ == DataType::DE_BOOL) { if (type_ == DataType::DE_BOOL) {
*data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_)); *data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_));
@ -974,7 +874,9 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vect
} }
Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) { Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) {
CHECK_FAIL_RETURN_UNEXPECTED(shape_.Rank() == 1, "Currently Slice work with rank 1 tensors only."); CHECK_FAIL_RETURN_UNEXPECTED(shape_.Rank() == 1, "Currently Slice work with rank 1 tensors only.");
CHECK_FAIL_RETURN_UNEXPECTED(!indices.empty(), "Indices are empty, generated tensor would be empty."); if (indices.empty()) {
return CreateEmpty(TensorShape({0}), type_, out);
}
if (type_.IsNumeric()) { if (type_.IsNumeric()) {
return SliceNumeric(out, indices); return SliceNumeric(out, indices);
} else { } else {
@ -982,8 +884,7 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &i
} }
} }
Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) { Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) {
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(CreateEmpty(TensorShape({static_cast<dsize_t>(indices.size())}), type_, out));
CreateTensor(out, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(indices.size())}), type_));
(*out)->GetMutableBuffer(); (*out)->GetMutableBuffer();
dsize_t out_index = 0; dsize_t out_index = 0;
dsize_t dim_length = shape_[0]; dsize_t dim_length = shape_[0];
@ -1027,7 +928,7 @@ Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize
GetItemAt(&sv, {cur_index}); GetItemAt(&sv, {cur_index});
strings.emplace_back(sv); strings.emplace_back(sv);
} }
return CreateTensor(out, strings); return CreateFromVector(strings, TensorShape({static_cast<dsize_t>(strings.size())}), out);
} }
} // namespace dataset } // namespace dataset

View File

@ -33,6 +33,7 @@
#include "pybind11/stl.h" #include "pybind11/stl.h"
#endif #endif
#include "common/utils.h"
#include "minddata/dataset/core/constants.h" #include "minddata/dataset/core/constants.h"
#include "minddata/dataset/core/data_type.h" #include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/core/tensor_shape.h"
@ -50,170 +51,155 @@ class Allocator;
using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>; using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors
using offset_t = uint32_t; // type of offset values to store strings locations
using TensorPtr = std::shared_ptr<Tensor>;
class Tensor { class Tensor {
public: public:
Tensor() = delete; Tensor() = delete;
// Create a new tensor, does not internally allocate storage. This constructor is protected, use CreateTensor.
// @note The shape and type information should be known and valid.
// @param shape TensorShape
// @param type DataType
Tensor(const TensorShape &shape, const DataType &type);
// Create a new tensor, allocates storage and copies in data. This constructor is protected, use CreateTensor.
// @note The buffer should be valid and the shape and type information should be known and valid.
// @param shape TensorShape
// @param type DataType
// @param data unsigned char*, pointer to the data.
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data);
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length);
Tensor(const Tensor &other) = delete; Tensor(const Tensor &other) = delete;
Tensor &operator=(const Tensor &other) = delete; Tensor &operator=(const Tensor &other) = delete;
/// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead
/// \note The shape and type information should be known and valid
/// \note The constructor does not allocate data
/// \param shape TensorShape
/// \param type DataType
Tensor(const TensorShape &shape, const DataType &type);
/// Move constructor
/// \param other Tensor to be moved
Tensor(Tensor &&other) noexcept; Tensor(Tensor &&other) noexcept;
/// Move assigment operator
/// \param other Tensor to be moved
Tensor &operator=(Tensor &&other) noexcept; Tensor &operator=(Tensor &&other) noexcept;
Status AllocateBuffer(const dsize_t &length); /// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized.
/// \param[in] shape shape of the output tensor
/// \param[in] type type of the output tensor
/// \param[out] out Generated tensor
/// \return Status code
static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out);
// type of offest values to store strings information /// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type.
using offset_t = uint32_t; /// Data will be copied into the new created tensor.
// const of the size of the offset variable /// \param[in] shape shape of the output tensor
static constexpr uint8_t kOffsetSize = sizeof(offset_t); /// \param[in] type type of the output tensor
// Tensor base class which holds the data in an unsigned char* buffer. /// \param[in] src pointer to the source data
/// \param[out] out Generated tensor
/// \return Status code
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out);
// Construct a scalar string Tensor /// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor.
explicit Tensor(const std::string &str) : Tensor(std::vector<std::string>{str}, TensorShape::CreateScalar()) {} /// \param[in] shape shape of the output tensor
/// \param[in] type type of the output tensor
/// \param[in] src pointer to the source data
/// \param[in] length length of the src data
/// \param[out] out Generated tensor
/// \return Status code
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src,
const dsize_t &length, TensorPtr *out);
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is /// Create a copy of the input tensor
// the size of the vector `strings`. /// \param[in] in original tensor to be copied
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings. /// \param[out] out output tensor to be generated
// Thr offset array will store one extra value to find the length of the last string. /// \return Status
// OFFSET1, OFFSET2, ..., OFFSETn+1, STRING1, STRING2, ..., STRINGn static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) {
// The value of each offset is the start index of the corresponding string return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
// Offsets is of type offest_t
// strings will ne null-terminated
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
// |----------------------------------------------------------------|
// | OFFSET ARRAY | STRINGS |
// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
// | 11 | 15 | 18 | abc\0 | de\0 |
// |----------------------------------------------------------------|
explicit Tensor(const std::vector<std::string> &strings,
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
// Same as Tensor(vector<string>) but the input is protobuf bytelist
explicit Tensor(const dataengine::BytesList &bytes_list,
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
// A static factory method to create the given flavour of derived Tensor
// Returns the base class reference for the Tensor.
// @param ptr output argument to hold the created Tensor of given tensor_impl
// @param tensor_impl - which implementation of Tensor
// @param shape - shape of the tensor
// @param type - datatype of the tensor
// @param data - data to be copied to Tensor new allocation
// @return Status Code
static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
const unsigned char *data = nullptr);
// Create a copy of the input tensor
// @param out [out] output tensor to be generated
// @param in [in] orginal tensor to be copied
// @return Status
static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
return Status::OK();
} }
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
// A static factory method to create a Tensor from a given py::array. /// Create a Tensor from a given py::array
// @param ptr output argument to hold the created Tensor /// \param[in] arr py::array
// @param arr py::array /// \param[out] out Created tensor
// @return Status Code /// \return Status Code
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr); static Status CreateFromNpArray(const py::array &arr, TensorPtr *out);
// Helper function to create a tensor from Numpy of strings
static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
#endif #endif
// A static factory method to create a Tensor from a given list of strings. /// Create a tensor of type DE_STRING from a BytesList.
// @param ptr output argument to hold the created Tensor /// \param[in] bytes_list protobuf's Bytelist
// @param strings elements of the tensor /// \param[in] shape shape of the outout tensor
// @param shape shape of the tensor /// \param[out] out created Tensor
// @return Status Code /// \return Status Code
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out);
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
// create tensor from protobuf bytelist with strings /// Create a tensor of type UINT8 or INT8 from a BytesList.
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, /// The tensor will be padded with ' ' to reach the required pad_size.
const TensorShape &shape); /// \param[in] bytes_list protobuf's Bytelist
/// \param[in] shape shape of the output tensor
/// \param[in] type type of created tensor. Should be DE_UINT8 or INT8
/// \param[in] pad_size The size of the tensor after padding
/// \param[out] out created Tensor
/// \return Status Code
static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
const DataType &type, dsize_t pad_size, TensorPtr *out);
// A static factory method to create a Tensor from a given list of numbers. /// Create a Tensor from a given list of values.
// @param ptr output argument to hold the created Tensor /// \tparam type of the values to be inserted.
// @param items elements of the tensor /// \param[in] items elements of the tensor
// @param shape shape of the tensor /// \param[in] shape shape of the output tensor
// @return Status Code /// \param[out] out output argument to hold the created Tensor
/// \return Status Code
template <typename T> template <typename T>
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<T> &items, static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) {
const TensorShape &shape_req = TensorShape::CreateUnknownRankShape()) { CHECK_FAIL_RETURN_UNEXPECTED(
items.size() == shape.NumOfElements(),
"Number of elements in the vector does not match the number of elements of the shape required");
DataType type = DataType::FromCType<T>(); DataType type = DataType::FromCType<T>();
// if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case.
auto items_ptr = reinterpret_cast<const uchar *>(&items[0]); auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
TensorShape shape = shape_req; return CreateFromMemory(shape, type, items_ptr, out);
if (!shape.known()) {
shape = TensorShape({static_cast<dsize_t>(items.size())});
}
return CreateTensor(ptr, TensorImpl::kFlexible, shape, type, items_ptr);
} }
// A static factory method to create a Tensor from a given number. /// Create a 1D Tensor from a given list of values.
// @param ptr output argument to hold the created Tensor /// \tparam type of the values to be inserted.
// @param item value /// \param[in] items elements of the tensor
// @return Status Code /// \param[out] out output argument to hold the created Tensor
/// \return Status Code
template <typename T> template <typename T>
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) { static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) {
return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar()); return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out);
} }
// Create tensor from protobuf bytelist with uint8 or int8 types /// Create a numeric scalar Tensor from the given value.
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, /// \tparam T type of value
const TensorShape &shape, const DataType &type, dsize_t pad_size); /// \param[in] item value
/// \param[out] out Created tensor
/// \return Status code
template <typename T>
static Status CreateScalar(const T &item, TensorPtr *out) {
DataType type = DataType::FromCType<T>();
auto item_ptr = reinterpret_cast<const uchar *>(&item);
return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out);
}
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &path); /// Create a tensor from a binary file on disk.
/// \param[in] path file to be read
/// \param[out] out Created Tensor
/// \return Status code
static Status CreateFromFile(const std::string &path, TensorPtr *out);
// Copy raw data of a array based on shape and strides to the destination pointer /// Destruct the tensor and release the memory using the allocator
// @param dst Pointer to the destination array where the content is to be copied
// @param src Pointer to the source of strided array to be copied
// @param shape - shape of the source array
// @param strides - strides of the source array
// @param type_size - number of bytes needed to store one array element's type
// @return Status Code
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
std::vector<dsize_t> strides, uint8_t type_size);
// Release the memory using the allocator
virtual ~Tensor(); virtual ~Tensor();
// compare the tensor shape and data /// Equality operator. compares tensor shape, type and data
/// \param[in] rhs Tensor to be compared with
/// \return bool
bool operator==(const Tensor &rhs) const; bool operator==(const Tensor &rhs) const;
bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); } bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
// Get item located at `index`, caller needs to provide the type. /// Get item located at `index`, caller needs to provide the type.
// @tparam T /// \tparam T
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return the item specified at index /// \return return the item specified at index
template <typename T> template <typename T>
Status GetItemAt(T *o, const std::vector<dsize_t> &index) const; Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
// Get string located at `index`. /// Get string located at `index`.
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return std::string_view specified at index /// \return return std::string_view specified at index
Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const; Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
template <typename T> template <typename T>
@ -225,22 +211,21 @@ class Tensor {
template <typename T> template <typename T>
Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const; Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const;
// set item at location specified by index /// set item at location specified by index
// @tparam `T` /// \tparam `T`
// @param index /// \param[in] index
// @param value of type `T` /// \param[in] value of type `T`
template <typename T> template <typename T>
Status SetItemAt(const std::vector<dsize_t> &index, const T &value) { Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
T *ptr = nullptr; T *ptr = nullptr;
RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index)); RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
*ptr = value; *ptr = value;
return Status::OK(); return Status::OK();
} }
// set string item at location specified by index /// set string item at location specified by index
// @param index /// \param[in] index
// @param value of type std::string /// \param[in] value of type std::string
Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) { Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) {
RETURN_UNEXPECTED_IF_NULL(data_); RETURN_UNEXPECTED_IF_NULL(data_);
uchar *ptr = nullptr; uchar *ptr = nullptr;
@ -253,7 +238,8 @@ class Tensor {
return Status::OK(); return Status::OK();
} }
// fill tensor with Zeros. Does not support strings.
/// fill tensor with Zeros. Does not support strings.
Status Zero() { Status Zero() {
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings.."); CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings..");
dsize_t size = SizeInBytes(); dsize_t size = SizeInBytes();
@ -262,13 +248,12 @@ class Tensor {
return Status::OK(); return Status::OK();
} }
// Fill all elements in the Tensor with the given value of type `T`. Does not support strings. /// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
// @tparam T /// \tparam T
// @param value /// \param value[in]
template <typename T> template <typename T>
Status Fill(const T &value) { Status Fill(const T &value) {
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings."); CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings.");
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
int64_t cellSize = type_.SizeInBytes(); int64_t cellSize = type_.SizeInBytes();
if ((data_ != nullptr) && type_.IsCompatible<T>()) { if ((data_ != nullptr) && type_.IsCompatible<T>()) {
for (dsize_t i = 0; i < Size(); i++) { for (dsize_t i = 0; i < Size(); i++) {
@ -283,91 +268,86 @@ class Tensor {
} }
} }
// Getter function for shape /// Getter function for shape
// @return /// \return
const TensorShape &shape() const { return shape_; } const TensorShape &shape() const { return shape_; }
/// Check if tensor has data /// Check if tensor has data
/// \return bool - true if tensor is empty /// \return bool - true if tensor is empty
bool HasData() const; bool HasData() const { return data_ != nullptr; }
// Reshape the tensor. The given shape should have the same number of elements in the Tensor /// Reshape the tensor. The given shape should have the same number of elements in the Tensor
// @param shape /// \param shape
virtual Status Reshape(const TensorShape &shape); virtual Status Reshape(const TensorShape &shape);
// @return number of elements in this tensor /// \return number of elements in this tensor
dsize_t Size() const { return shape().NumOfElements(); } dsize_t Size() const { return shape().NumOfElements(); }
// @return the number of bytes this tensor is needs /// \return the number of bytes this tensor is needs
dsize_t SizeInBytes() const { dsize_t SizeInBytes() const {
if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements(); if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements();
return data_end_ - data_; return data_end_ - data_;
} }
// @return the rank of the tensor /// \return the rank of the tensor
dsize_t Rank() const { return shape().Rank(); } dsize_t Rank() const { return shape().Rank(); }
// Get the starting memory address as a constant for the data of the tensor. This potentially /// Get the starting memory address as a constant for the data of the tensor. This potentially
// drives an allocation if the data area. /// drives an allocation if the data area.
// @return const unsigned char* /// \return const unsigned char*
const unsigned char *GetBuffer() const; const unsigned char *GetBuffer() const { return data_; }
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the /// Getter of the type
// tensor's type is a string, otherwise undefined address would be returned. /// \return
// @return address of the first string of the tensor.
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
// Getter of the type
// @return
DataType type() const { return type_; } DataType type() const { return type_; }
// Provide stream operator for displaying it /// Provide stream operator for displaying it
// @param output stream /// \param output stream
// @param so the Tensor object to be printed /// \param so the Tensor object to be printed
// @return output stream /// \return output stream
friend std::ostream &operator<<(std::ostream &out, const Tensor &so) { friend std::ostream &operator<<(std::ostream &out, const Tensor &so) {
so.Print(out); so.Print(out);
return out; return out;
} }
// Invalidate this Tensor by setting the type and shape to unknown and MData to null. /// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
// Calling this method will make the Tensor and its data inaccessible, use it with caution. /// Calling this method will make the Tensor and its data inaccessible, use it with caution.
void Invalidate(); void Invalidate();
// Copy input tensor into self at the location index. /// Copy input tensor into self at the location index.
// Index is a vector of axises which can be incomplete: /// Index is a vector of axises which can be incomplete:
// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
// @param index /// \param index
// @param input /// \param input
// @return Status code /// \return Status code
Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
// Find the address of the given index. Used in InsertTensor. /// Find the address of the given index. Used in InsertTensor.
// Example: /// Example:
// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1 /// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
// @param index incomplete index /// \param index incomplete index
// @param output: startAddrofIndex /// \param output: startAddrofIndex
// @param output: remaining /// \param output: remaining
// @return Status code /// \return Status code
Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining); Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining);
// Expand the shape of the Tensor with one extra dimension. /// Expand the shape of the Tensor with one extra dimension.
// For example, if the shape is <512,512,3>: /// For example, if the shape is <512,512,3>:
// *- ExpandDim(0) gives: <1,512,512,3> /// *- ExpandDim(0) gives: <1,512,512,3>
// *- ExpandDim(1) gives: <512,1,512,3> /// *- ExpandDim(1) gives: <512,1,512,3>
// *- ExpandDim(3) gives: <512,512,3,1> /// *- ExpandDim(3) gives: <512,512,3,1>
// @param axis location of the dim /// \param axis location of the dim
virtual Status ExpandDim(const dsize_t &axis); virtual Status ExpandDim(const dsize_t &axis);
virtual void Squeeze(); virtual void Squeeze();
// Calculates the strides of the Tensor /// Calculates the strides of the Tensor
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
// The strides will be {6,2,1}. /// The strides will be {6,2,1}.
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
// The strides will be {24,8,4}. /// The strides will be {24,8,4}.
// @return vector of integers /// \return vector of integers
std::vector<dsize_t> Strides(); std::vector<dsize_t> Strides() const;
std::string ToString() { std::string ToString() {
std::stringstream ss; std::stringstream ss;
@ -375,26 +355,26 @@ class Tensor {
return ss.str(); return ss.str();
} }
// Handle negative indices. /// Handle negative indices.
static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; } static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported. /// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported.
// Based on the type of tensor, SliceNumeric or SliceString will be called /// Based on the type of tensor, SliceNumeric or SliceString will be called
// @param out Tensor /// \param[out] out Tensor
// @param indices vector of indices /// \param[in] indices vector of indices
// @return Status error code /// \return Status error code
Status Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); Status Slice(TensorPtr *out, const std::vector<dsize_t> &indices);
// Slice numeric tensors. /// Slice numeric tensors.
Status SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); Status SliceNumeric(TensorPtr *out, const std::vector<dsize_t> &indices);
// Slice string tensors /// Slice string tensors
Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); Status SliceString(TensorPtr *out, const std::vector<dsize_t> &indices);
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
// Constructs numpy array from input tensor /// Constructs numpy array from input tensor
// @param data this data is the location of python data /// \param[in] data this data is the location of python data
// @return Status code /// \return Status code
Status GetDataAsNumpy(py::array *data); Status GetDataAsNumpy(py::array *data);
Status GetDataAsNumpyStrings(py::array *data); Status GetDataAsNumpyStrings(py::array *data);
@ -402,12 +382,12 @@ class Tensor {
static Status GetBufferInfo(Tensor *t, py::buffer_info *out); static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
#endif #endif
// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor /// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
// @tparam T type of values in the Tensor Iterator /// \tparam T type of values in the Tensor Iterator
template <typename T, bool = true> template <typename T, bool = true>
class TensorIterator { class TensorIterator {
public: public:
@ -498,7 +478,7 @@ class Tensor {
}; };
// Specialization of TensorIterator for strings. It returns std::string_view for every item. // Specialization of TensorIterator for strings. It returns std::string_view for every item.
// @tparam DUMMY, used to mbe able to specialize the inner class // \tparam DUMMY, used to mbe able to specialize the inner class
template <bool DUMMY> template <bool DUMMY>
class TensorIterator<std::string_view, DUMMY> { class TensorIterator<std::string_view, DUMMY> {
public: public:
@ -585,84 +565,192 @@ class Tensor {
const char *data_; const char *data_;
}; };
// Return a TensorIterator that points to the start of the Tensor. /// Return a TensorIterator that points to the start of the Tensor.
// It's the user responsibility to use the correct type that matches the Tensor type /// It's the user responsibility to use the correct type that matches the Tensor type
// @param T The type of values in the Tensor /// \tparam T The type of values in the Tensor
// @return TensorIterator /// \return TensorIterator
template <typename T> template <typename T>
TensorIterator<T> begin() { TensorIterator<T> begin() {
AllocateBuffer(SizeInBytes());
return TensorIterator<T>(data_); return TensorIterator<T>(data_);
} }
// Return a linear iterator that points to the place after the last element of the Tensor. /// Return a linear iterator that points to the place after the last element of the Tensor.
// @tparam T The type of values in the Tensor /// \tparam T The type of values in the Tensor
// @return TensorIterator /// \return TensorIterator
template <typename T> template <typename T>
TensorIterator<T> end() { TensorIterator<T> end() {
return TensorIterator<T>(data_end_); return TensorIterator<T>(data_end_);
} }
// Copies the last dimension at `index` from Tensor `src` to this Tensor. /// Copies the last dimension at `index` from Tensor `src` to this Tensor.
// @param src Tensor /// \param[in] src Tensor
// @param index vector to the start of the dimension. The last dim should be 0 /// \param[in] index vector to the start of the dimension. The last dim should be 0
// @return Status /// \return Status
Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index); Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
protected: protected:
// Get the starting memory address for the data of the tensor. This potentially /// Allocate memory for the tensor using the data_allocator
// drives an allocation if the data is null. /// \param[in] length number of bytes to be allocated
// @return unsigned char* /// \return Error Status
unsigned char *GetMutableBuffer(); Status AllocateBuffer(const dsize_t &length);
// A function that prints Tensor recursively, first called by print /// Get the starting memory address for the data of the tensor. This potentially
// @param out /// drives an allocation if the data is null.
// @param cur_dim /// \return unsigned char*
// @param cur_index unsigned char *GetMutableBuffer() { return data_; }
/// A function that prints Tensor recursively, first called by print
/// \param[in] out
/// \param[in] cur_dim
/// \param[in] cur_index
void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const; void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const;
// A function that prints info about the tensor /// A function that prints info about the tensor
// @param out output stream /// \param[out] out output stream
void Print(std::ostream &out) const; void Print(std::ostream &out) const;
// A function that print the value as specified by its index /// A function that print the value as specified by its index
// @param index vector representing the index /// \param[in] index vector representing the index
// @param out /// \param[out] out
void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const; void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const;
// Get pointer to item located at `index`, caller needs to provide the type. /// Get pointer to item located at `index`, caller needs to provide the type.
// @tparam T /// \tparam T
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return a pointer to the item specified at index of type `T` /// \return return a pointer to the item specified at index of type `T`
template <typename T> template <typename T>
Status GetItemPtr(T **, const std::vector<dsize_t> &index) const; Status GetItemPtr(T **, const std::vector<dsize_t> &index) const;
// Get pointer to string located at `index` and the length of string /// Get pointer to string located at `index` and the length of string
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return a pointer to the string specified at index and the length of the string /// \return return a pointer to the string specified at index and the length of the string
Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const; Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
// Given a flat index of an item string, return the start and length of the item /// Given a flat index of an item string, return the start and length of the item
// @param index flat index of the item /// \param[in] index flat index of the item
// @return start address of the ths string /// \param[out] start address of the ths string
// @return length of the string /// \param[out] length of the string
Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const; Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
// all access to shape_ should be via shape /// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if
/// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string
/// of the tensor.
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
/// all access to shape_ should be via shape
TensorShape shape_; TensorShape shape_;
// data type of tensor /// data type of tensor
DataType type_; DataType type_;
// pointer to the start of the physical data /// pointer to the start of the physical data
unsigned char *data_; unsigned char *data_;
// An allocator for data_ /// An allocator for data_
CharAllocPtr data_allocator_; CharAllocPtr data_allocator_;
// pointer to the end of the physical data /// pointer to the end of the physical data
unsigned char *data_end_ = nullptr; unsigned char *data_end_ = nullptr;
private:
/// Helper function to create a tensor from Numpy array of strings
/// \param[in] arr Numpy array
/// \param[out] out Created Tensor
/// \return Status
static Status CreateFromNpString(py::array arr, TensorPtr *out);
/// Copy raw data of a array based on shape and strides to the destination pointer
/// \param dst [out] Pointer to the destination array where the content is to be copied
/// \param[in] src Pointer to the source of strided array to be copied
/// \param[in] shape shape of the source array
/// \param[in] strides strides of the source array
/// \param[in] type_size number of bytes needed to store one array element's type
/// \return Status Code
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
std::vector<dsize_t> strides, uint8_t type_size);
/// const of the size of the offset variable
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
}; };
template <> template <>
inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() { inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
return TensorIterator<std::string_view>(data_, shape_.NumOfElements()); return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
} }
/// Create a Tensor from a given list of strings.
/// @note: The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
/// The offset array will store one extra value to find the length of the last string.
/// OFFSET_1, OFFSET_2, ..., OFFSET_n+1, STRING_1, STRING_2, ..., STRING_n
/// The value of each offset is the start index of the corresponding string
/// Offsets is of type offset_t
/// strings will ne null-terminated
/// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
/// |----------------------------------------------------------------|
/// | OFFSET ARRAY | STRINGS |
/// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
/// | 11 | 15 | 18 | abc\0 | de\0 |
/// |----------------------------------------------------------------|
/// \param[in] items elements of the tensor
/// \param[in] shape shape of the output tensor
/// \param[out] out output argument to hold the created Tensor
/// \return Status Code
template <>
inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
TensorPtr *out) {
CHECK_FAIL_RETURN_UNEXPECTED(
items.size() == shape.NumOfElements(),
"Number of elements in the vector does not match the number of elements of the shape required");
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
DataType(DataType::DE_STRING));
if (items.size() == 0) {
if (shape.known()) {
return (*out)->Reshape(shape);
}
}
auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
dsize_t total_length = std::accumulate(items.begin(), items.end(), 0, length_sum);
// total bytes needed = offset array + strings
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
// strings will be null-terminated --> need 1 extra byte per element
dsize_t num_bytes = (kOffsetSize + 1) * (*out)->shape_.NumOfElements() + kOffsetSize + total_length;
(*out)->AllocateBuffer(num_bytes);
auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
uchar *buf = (*out)->GetStringsBuffer();
offset_t offset = buf - (*out)->data_; // the first string will start here
uint32_t i = 0;
for (const auto &str : items) {
// insert the start index of the string.
offset_arr[i++] = offset;
// total bytes are reduced by kOffsetSize
num_bytes -= kOffsetSize;
// insert actual string
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
// next string will be stored right after the current one.
offset = offset + str.length() + 1;
// total bytes are reduced by the length of the string
num_bytes -= str.length() + 1;
}
// store one more offset value so we can get the length of the last string
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
offset_arr[i] = offset;
(*out)->data_end_ = (*out)->data_ + offset_arr[i];
MS_ASSERT(num_bytes == 0);
if (shape.known()) {
RETURN_IF_NOT_OK((*out)->Reshape(shape));
}
return Status::OK();
}
/// Create a string scalar Tensor from the given value.
/// \param[in] item value
/// \param[out] out Created tensor
/// \return Status code
template <>
inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
}
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_

View File

@ -141,8 +141,9 @@ Status BatchFetchRequest::RestoreOneTensor(const TensorMetaMsg *col_ts, const Re
#undef CASE #undef CASE
DataType type(dest); DataType type(dest);
std::shared_ptr<Tensor> ts = std::shared_ptr<Tensor> ts;
std::make_shared<Tensor>(shape, type, static_cast<const unsigned char *>(data.GetPointer()), data.GetSize()); RETURN_IF_NOT_OK(
Tensor::CreateFromMemory(shape, type, static_cast<const unsigned char *>(data.GetPointer()), data.GetSize(), &ts));
// Next we restore the real data which can be embedded or stored separately. // Next we restore the real data which can be embedded or stored separately.
if (ts->SizeInBytes() != data.GetSize()) { if (ts->SizeInBytes() != data.GetSize()) {
MS_LOG(ERROR) << "Unexpected length. Read " << data.GetSize() << ". Expected " << ts->SizeInBytes() << ".\n" MS_LOG(ERROR) << "Unexpected length. Read " << data.GetSize() << ". Expected " << ts->SizeInBytes() << ".\n"

View File

@ -176,12 +176,15 @@ Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, const std::u
std::shared_ptr<Tensor> new_tensor; std::shared_ptr<Tensor> new_tensor;
if (first_type.IsNumeric()) { // numeric tensor if (first_type.IsNumeric()) { // numeric tensor
RETURN_IF_NOT_OK(Tensor::CreateTensor(&new_tensor, TensorImpl::kFlexible, new_shape, first_type)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, first_type, &new_tensor));
dsize_t j = 0; dsize_t j = 0;
for (auto row : **src) { for (auto row : **src) {
std::shared_ptr<Tensor> old_tensor = row.at(i); // row j, column i std::shared_ptr<Tensor> old_tensor = row.at(i); // row j, column i
if (old_tensor->shape() == first_shape) { // check the newly popped rows have the same dim as the first if (old_tensor->shape() == first_shape) { // check the newly popped rows have the same dim as the first
RETURN_IF_NOT_OK(new_tensor->InsertTensor({j++}, old_tensor)); if (new_shape.NumOfElements() != 0) {
RETURN_IF_NOT_OK(new_tensor->InsertTensor({j++}, old_tensor));
}
// Don't do anything if the tensor has no data
} else { } else {
RETURN_STATUS_UNEXPECTED("[Batch ERROR] Inconsistent TensorShapes of Column " + std::to_string(i)); RETURN_STATUS_UNEXPECTED("[Batch ERROR] Inconsistent TensorShapes of Column " + std::to_string(i));
} }
@ -194,7 +197,7 @@ Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, const std::u
strings.emplace_back(*itr); strings.emplace_back(*itr);
} }
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(&new_tensor, strings, new_shape)); RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, new_shape, &new_tensor));
} }
batched_row.emplace_back(new_tensor); batched_row.emplace_back(new_tensor);
} }
@ -352,7 +355,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
py::list output_list = py::cast<py::list>(ret_tuple[i]); py::list output_list = py::cast<py::list>(ret_tuple[i]);
for (size_t j = 0; j < output_list.size(); j++) { for (size_t j = 0; j < output_list.size(); j++) {
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, py::cast<py::array>(output_list[j]))); RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(py::cast<py::array>(output_list[j]), &out));
output_batch.push_back(std::move(out)); output_batch.push_back(std::move(out));
} }
output->push_back(std::move(output_batch)); output->push_back(std::move(output_batch));

View File

@ -226,7 +226,8 @@ void CacheMergeOp::TensorRowRequest::WakeUpAny(TensorRow &&row) {
if (GetState() == State::kEmpty) { if (GetState() == State::kEmpty) {
// We will do a deep copy // We will do a deep copy
for (auto &ts : row) { for (auto &ts : row) {
auto out_ts = std::make_shared<Tensor>(ts->shape(), ts->type(), ts->GetBuffer(), ts->SizeInBytes()); std::shared_ptr<Tensor> out_ts;
Tensor::CreateFromTensor(ts, &out_ts);
cleaner_copy_.push_back(out_ts); cleaner_copy_.push_back(out_ts);
} }
cleaner_copy_.setId(row.getId()); cleaner_copy_.setId(row.getId());

View File

@ -72,6 +72,7 @@ Status DeviceQueueOp::CheckExceptions(const std::unique_ptr<DataBuffer> &buffer)
buffer->GetRow(0, &row); buffer->GetRow(0, &row);
for (const auto &item : row) { for (const auto &item : row) {
CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Cannot send tensor of string type to device."); CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Cannot send tensor of string type to device.");
CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Cannot send tensor with no data.");
} }
} }
return Status::OK(); return Status::OK();

View File

@ -359,7 +359,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string,
Path path(folder_path_); Path path(folder_path_);
Path image_path = path / image_label.first; Path image_path = path / image_label.first;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, image_path.toString())); RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_path.toString(), &image));
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(image, &image); Status rc = Decode(image, &image);
if (rc.IsError()) { if (rc.IsError()) {
@ -369,9 +369,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string,
} }
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), RETURN_IF_NOT_OK(
TensorShape({1, (uint32_t)image_label.second.size()}), Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->column(1).type(), &label));
data_schema_->column(1).type()));
RETURN_IF_NOT_OK(label->Zero()); RETURN_IF_NOT_OK(label->Zero());
for (uint32_t index = 0; index < image_label.second.size(); index++) { for (uint32_t index = 0; index < image_label.second.size(); index++) {
if (image_label.second[index] == 1) { if (image_label.second[index] == 1) {

View File

@ -190,15 +190,12 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
std::shared_ptr<Tensor> label; std::shared_ptr<Tensor> label;
std::shared_ptr<Tensor> fine_label; std::shared_ptr<Tensor> fine_label;
std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first; std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first;
std::shared_ptr<Tensor> copy_image = std::shared_ptr<Tensor> copy_image;
std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetBuffer()); RETURN_IF_NOT_OK(Tensor::CreateFromTensor(ori_image, &copy_image));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[0], &label));
data_schema_->column(1).type(),
reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0])));
if (cifar_image_label_pairs_[index].second.size() > 1) { if (cifar_image_label_pairs_[index].second.size() > 1) {
RETURN_IF_NOT_OK(Tensor::CreateTensor( RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[1], &fine_label));
&fine_label, data_schema_->column(2).tensorImpl(), data_schema_->column(2).shape(),
data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[1])));
(*trow) = TensorRow(index, {copy_image, std::move(label), std::move(fine_label)}); (*trow) = TensorRow(index, {copy_image, std::move(label), std::move(fine_label)});
} else { } else {
(*trow) = TensorRow(index, {copy_image, std::move(label)}); (*trow) = TensorRow(index, {copy_image, std::move(label)});
@ -359,9 +356,8 @@ Status CifarOp::ParseCifarData() {
} }
std::shared_ptr<Tensor> image_tensor; std::shared_ptr<Tensor> image_tensor;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image_tensor, data_schema_->column(0).tensorImpl(), RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}), data_schema_->column(0).type(), &image_tensor));
data_schema_->column(0).type()));
auto itr = image_tensor->begin<uint8_t>(); auto itr = image_tensor->begin<uint8_t>();
uint32_t total_pix = kCifarImageHeight * kCifarImageWidth; uint32_t total_pix = kCifarImageHeight * kCifarImageWidth;
for (int pix = 0; pix < total_pix; ++pix) { for (int pix = 0; pix < total_pix; ++pix) {

View File

@ -127,7 +127,7 @@ Status ClueOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTable>
(*tensor_table)->push_back(std::move(tRow)); (*tensor_table)->push_back(std::move(tRow));
std::shared_ptr<Tensor> tensor; std::shared_ptr<Tensor> tensor;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar())); RETURN_IF_NOT_OK(Tensor::CreateScalar(line, &tensor));
(**tensor_table)[row][0] = std::move(tensor); (**tensor_table)[row][0] = std::move(tensor);
return Status::OK(); return Status::OK();
} }
@ -144,26 +144,19 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c
std::string final_str = key_chain.back(); std::string final_str = key_chain.back();
switch (cursor.type()) { switch (cursor.type()) {
case nlohmann::detail::value_t::string: case nlohmann::detail::value_t::string:
RETURN_IF_NOT_OK(Tensor::CreateTensor(t, {cursor.get<std::string>()}, TensorShape::CreateScalar())); RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<std::string>(), t));
break; break;
case nlohmann::detail::value_t::number_integer: case nlohmann::detail::value_t::number_integer:
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<int32_t>(), t));
Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32)));
(*t)->SetItemAt<int32_t>({0}, cursor.get<int32_t>());
break; break;
case nlohmann::detail::value_t::number_unsigned: case nlohmann::detail::value_t::number_unsigned:
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<uint32_t>(), t));
Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32)));
(*t)->SetItemAt<int32_t>({0}, cursor.get<uint32_t>());
break; break;
case nlohmann::detail::value_t::number_float: case nlohmann::detail::value_t::number_float:
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<float>(), t));
Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32)));
(*t)->SetItemAt<int32_t>({0}, cursor.get<float>());
break; break;
case nlohmann::detail::value_t::array: case nlohmann::detail::value_t::array:
RETURN_IF_NOT_OK(Tensor::CreateTensor(t, {cursor.get<std::vector<std::string>>()}, TensorShape::CreateScalar())); RETURN_IF_NOT_OK(Tensor::CreateFromVector(cursor.get<std::vector<std::string>>(), t));
break; break;
default: default:
break; break;

View File

@ -239,9 +239,8 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Te
} }
std::vector<dsize_t> bbox_dim = {bbox_row_num, bbox_column_num}; std::vector<dsize_t> bbox_dim = {bbox_row_num, bbox_column_num};
RETURN_IF_NOT_OK(Tensor::CreateTensor(&coordinate, data_schema_->column(1).tensorImpl(), TensorShape(bbox_dim), RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_row, TensorShape(bbox_dim), &coordinate));
data_schema_->column(1).type(),
reinterpret_cast<unsigned char *>(&bbox_row[0])));
if (task_type_ == TaskType::Detection) { if (task_type_ == TaskType::Detection) {
RETURN_IF_NOT_OK(LoadDetectionTensorRow(row_id, image_id, image, coordinate, trow)); RETURN_IF_NOT_OK(LoadDetectionTensorRow(row_id, image_id, image, coordinate, trow));
} else if (task_type_ == TaskType::Stuff || task_type_ == TaskType::Keypoint) { } else if (task_type_ == TaskType::Stuff || task_type_ == TaskType::Keypoint) {
@ -278,13 +277,12 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
iscrowd_row.push_back(annotation[i]); iscrowd_row.push_back(annotation[i]);
} }
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor( RETURN_IF_NOT_OK(Tensor::CreateFromVector(
&category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), category_id_row, TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), &category_id));
data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0])));
RETURN_IF_NOT_OK(
Tensor::CreateFromVector(iscrowd_row, TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), &iscrowd));
RETURN_IF_NOT_OK(Tensor::CreateTensor(
&iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}),
data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0])));
(*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)}); (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)});
return Status::OK(); return Status::OK();
} }
@ -302,9 +300,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
item_queue = itr_item->second; item_queue = itr_item->second;
std::vector<dsize_t> bbox_dim = {static_cast<dsize_t>(item_queue.size()), 1}; std::vector<dsize_t> bbox_dim = {static_cast<dsize_t>(item_queue.size()), 1};
RETURN_IF_NOT_OK(Tensor::CreateTensor(&item, data_schema_->column(2).tensorImpl(), TensorShape(bbox_dim), RETURN_IF_NOT_OK(Tensor::CreateFromVector(item_queue, TensorShape(bbox_dim), &item));
data_schema_->column(2).type(),
reinterpret_cast<unsigned char *>(&item_queue[0])));
(*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(item)}); (*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(item)});
return Status::OK(); return Status::OK();
} }
@ -334,18 +331,14 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
area_row.push_back(annotation[i]); area_row.push_back(annotation[i]);
} }
} }
RETURN_IF_NOT_OK(Tensor::CreateFromVector(
category_id_row, TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), &category_id));
RETURN_IF_NOT_OK(Tensor::CreateTensor( RETURN_IF_NOT_OK(
&category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), Tensor::CreateFromVector(iscrowd_row, TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), &iscrowd));
data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor( RETURN_IF_NOT_OK(Tensor::CreateFromVector(area_row, TensorShape({static_cast<dsize_t>(area_row.size()), 1}), &area));
&iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}),
data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(
&area, data_schema_->column(4).tensorImpl(), TensorShape({static_cast<dsize_t>(area_row.size()), 1}),
data_schema_->column(4).type(), reinterpret_cast<unsigned char *>(&area_row[0])));
(*trow) = TensorRow( (*trow) = TensorRow(
row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)}); row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)});
return Status::OK(); return Status::OK();
@ -596,7 +589,7 @@ Status CocoOp::LaunchThreadsAndInitOp() {
} }
Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) { Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path)); RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(*tensor, tensor); Status rc = Decode(*tensor, tensor);

View File

@ -102,18 +102,13 @@ int CsvOp::CsvParser::put_record(char c) {
std::shared_ptr<Tensor> t; std::shared_ptr<Tensor> t;
switch (column_default_[cur_col_]->type) { switch (column_default_[cur_col_]->type) {
case CsvOp::INT: case CsvOp::INT:
Tensor::CreateTensor(&t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32)); Tensor::CreateScalar(std::stoi(s), &t);
t->SetItemAt<int32_t>({0}, std::stoi(s));
break; break;
case CsvOp::FLOAT: case CsvOp::FLOAT:
Tensor::CreateTensor(&t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32)); Tensor::CreateScalar(std::stof(s), &t);
t->SetItemAt<float>({0}, std::stof(s));
break;
case CsvOp::STRING:
Tensor::CreateTensor(&t, {s}, TensorShape::CreateScalar());
break; break;
default: default:
Tensor::CreateTensor(&t, {s}, TensorShape::CreateScalar()); Tensor::CreateScalar(s, &t);
break; break;
} }
(*tensor_table_)[cur_row_][cur_col_] = std::move(t); (*tensor_table_)[cur_row_][cur_col_] = std::move(t);

View File

@ -129,7 +129,7 @@ Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row)
"Generator should return a tuple of numpy arrays."); "Generator should return a tuple of numpy arrays.");
} }
std::shared_ptr<Tensor> tensor; std::shared_ptr<Tensor> tensor;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, ret_py_ele.cast<py::array>())); RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast<py::array>(), &tensor));
if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) && if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) &&
(column_types_[i] != tensor->type())) { (column_types_[i] != tensor->type())) {
return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, "Generator type check failed."); return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, "Generator type check failed.");

View File

@ -201,10 +201,8 @@ Status ImageFolderOp::WorkerEntry(int32_t worker_id) {
// Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer // Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer
Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, TensorRow *trow) { Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, TensorRow *trow) {
std::shared_ptr<Tensor> image, label; std::shared_ptr<Tensor> image, label;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(), RETURN_IF_NOT_OK(Tensor::CreateScalar(pairPtr->second, &label));
data_schema_->column(1).type(), RETURN_IF_NOT_OK(Tensor::CreateFromFile(folder_path_ + (pairPtr->first), &image));
reinterpret_cast<unsigned char *>(&pairPtr->second)));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, folder_path_ + (pairPtr->first)));
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(image, &image); Status rc = Decode(image, &image);

View File

@ -185,17 +185,14 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string
std::vector<int32_t> label_index(data.second.size()); std::vector<int32_t> label_index(data.second.size());
(void)std::transform(data.second.begin(), data.second.end(), label_index.begin(), (void)std::transform(data.second.begin(), data.second.end(), label_index.begin(),
[this](const std::string &label_name) { return label_index_[label_name]; }); [this](const std::string &label_name) { return label_index_[label_name]; });
RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_index, &label));
if (label_index.size() == 1) { if (label_index.size() == 1) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), TensorShape({}), label->Reshape(TensorShape({}));
data_schema_->column(1).type(),
reinterpret_cast<unsigned char *>(&label_index[0])));
} else { } else {
RETURN_IF_NOT_OK(Tensor::CreateTensor( label->Reshape(TensorShape(std::vector<dsize_t>(1, label_index.size())));
&label, data_schema_->column(1).tensorImpl(), TensorShape(std::vector<dsize_t>(1, label_index.size())),
data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&label_index[0])));
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data.first)); RETURN_IF_NOT_OK(Tensor::CreateFromFile(data.first, &image));
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(image, &image); Status rc = Decode(image, &image);
if (rc.IsError()) { if (rc.IsError()) {

View File

@ -381,15 +381,15 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
auto num_elements = n_bytes / column_data_type_size; auto num_elements = n_bytes / column_data_type_size;
if (type == DataType::DE_STRING) { if (type == DataType::DE_STRING) {
std::string s{data, data + n_bytes}; std::string s{data, data + n_bytes};
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {s}, TensorShape::CreateScalar())); RETURN_IF_NOT_OK(Tensor::CreateScalar(s, &tensor));
} else if (column.hasShape()) { } else if (column.hasShape()) {
auto new_shape = TensorShape(column.shape()); auto new_shape = TensorShape(column.shape());
RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape)); RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, column.tensorImpl(), new_shape, type, data)); RETURN_IF_NOT_OK(Tensor::CreateFromMemory(new_shape, type, data, &tensor));
} else { } else {
std::vector<dsize_t> shapeDetails = {static_cast<dsize_t>(num_elements)}; std::vector<dsize_t> shapeDetails = {static_cast<dsize_t>(num_elements)};
auto new_shape = TensorShape(shapeDetails); auto new_shape = TensorShape(shapeDetails);
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, column.tensorImpl(), new_shape, type, data)); RETURN_IF_NOT_OK(Tensor::CreateFromMemory(new_shape, type, data, &tensor));
} }
tensor_row->push_back(std::move(tensor)); tensor_row->push_back(std::move(tensor));
} }

View File

@ -160,12 +160,10 @@ Status MnistOp::WorkerEntry(int32_t worker_id) {
// Load 1 TensorRow (image,label) using 1 MnistLabelPair. // Load 1 TensorRow (image,label) using 1 MnistLabelPair.
Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pair, TensorRow *trow) { Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pair, TensorRow *trow) {
std::shared_ptr<Tensor> image, label; std::shared_ptr<Tensor> image, label;
int32_t l = mnist_pair.second;
// make a copy of cached tensor // make a copy of cached tensor
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(), RETURN_IF_NOT_OK(Tensor::CreateFromTensor(mnist_pair.first, &image));
mnist_pair.first->type(), mnist_pair.first->GetBuffer())); RETURN_IF_NOT_OK(Tensor::CreateScalar(mnist_pair.second, &label));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l)));
(*trow) = TensorRow(row_id, {std::move(image), std::move(label)}); (*trow) = TensorRow(row_id, {std::move(image), std::move(label)});
return Status::OK(); return Status::OK();
} }
@ -325,8 +323,8 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
pixels[m] = (pixels[m] == 0) ? 0 : 255; pixels[m] = (pixels[m] == 0) ? 0 : 255;
} }
std::shared_ptr<Tensor> image; std::shared_ptr<Tensor> image;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), img_tensor_shape, RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(),
data_schema_->column(0).type(), reinterpret_cast<unsigned char *>(pixels))); reinterpret_cast<unsigned char *>(pixels), &image));
image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j])); image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j]));
} }
return Status::OK(); return Status::OK();

View File

@ -40,7 +40,7 @@ namespace dataset {
template <typename T> template <typename T>
class Queue; class Queue;
using MnistLabelPair = std::pair<std::shared_ptr<Tensor>, int32_t>; using MnistLabelPair = std::pair<std::shared_ptr<Tensor>, uint32_t>;
class MnistOp : public ParallelOp, public RandomAccessOp { class MnistOp : public ParallelOp, public RandomAccessOp {
public: public:

View File

@ -361,8 +361,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor."); return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
} }
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.type(), buf.get(), &new_tensor));
Tensor::CreateTensor(&new_tensor, current_col.tensorImpl(), *new_shape, current_col.type(), buf.get()));
// Add this tensor to the tensor row for output // Add this tensor to the tensor row for output
(*new_row).push_back(std::move(new_tensor)); (*new_row).push_back(std::move(new_tensor));

View File

@ -41,7 +41,7 @@ Status PythonSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
try { try {
py::object py_ret = py_sampler_instance.attr("_get_indices")(); py::object py_ret = py_sampler_instance.attr("_get_indices")();
py::array np_sample_ids = py_ret.cast<py::array>(); py::array np_sample_ids = py_ret.cast<py::array>();
Tensor::CreateTensor(&sample_ids, np_sample_ids); // copy numpy to tensor Tensor::CreateFromNpArray(np_sample_ids, &sample_ids); // copy numpy to tensor
if (HasChildSampler()) { if (HasChildSampler()) {
for (auto it = sample_ids->begin<int64_t>(); it != sample_ids->end<int64_t>(); ++it) { for (auto it = sample_ids->begin<int64_t>(); it != sample_ids->end<int64_t>(); ++it) {

View File

@ -73,9 +73,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t
col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1); col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1);
} }
TensorShape shape(std::vector<dsize_t>(1, num_elements)); TensorShape shape(std::vector<dsize_t>(1, num_elements));
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type())); RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->type(), sample_ids));
RETURN_IF_NOT_OK(
(*sample_ids)->AllocateBuffer((*sample_ids)->SizeInBytes())); // allocate memory in case user forgets!
return Status::OK(); return Status::OK();
} }

View File

@ -146,7 +146,7 @@ Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTa
(*tensor_table)->push_back(std::move(tRow)); (*tensor_table)->push_back(std::move(tRow));
std::shared_ptr<Tensor> tensor; std::shared_ptr<Tensor> tensor;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar())); RETURN_IF_NOT_OK(Tensor::CreateScalar(line, &tensor));
(**tensor_table)[row][0] = std::move(tensor); (**tensor_table)[row][0] = std::move(tensor);
return Status::OK(); return Status::OK();
} }

View File

@ -677,8 +677,7 @@ Status TFReaderOp::LoadFeature(const std::unique_ptr<TensorQTable> *tensor_table
// into the tensor // into the tensor
TensorShape current_shape = TensorShape::CreateUnknownRankShape(); TensorShape current_shape = TensorShape::CreateUnknownRankShape();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, &current_shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, &current_shape));
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.type(), data_ptr, &ts));
Tensor::CreateTensor(&ts, current_col.tensorImpl(), current_shape, current_col.type(), data_ptr));
break; break;
} }
case dataengine::Feature::KindCase::kInt64List: { case dataengine::Feature::KindCase::kInt64List: {
@ -735,7 +734,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
if (current_col.type() == DataType::DE_STRING) { if (current_col.type() == DataType::DE_STRING) {
TensorShape shape = TensorShape::CreateScalar(); TensorShape shape = TensorShape::CreateScalar();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape));
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, shape)); RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor));
return Status::OK(); return Status::OK();
} }
@ -763,7 +762,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor &current_col, const dataeng
// know how many elements there are and the total bytes, create tensor here: // know how many elements there are and the total bytes, create tensor here:
TensorShape current_shape = TensorShape::CreateScalar(); TensorShape current_shape = TensorShape::CreateScalar();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, &current_shape));
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, current_shape, current_col.type(), pad_size)); RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.type(), pad_size, tensor));
return Status::OK(); return Status::OK();
} }
@ -836,10 +835,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor &current_col, const dataengin
// know how many elements there are, create tensor here: // know how many elements there are, create tensor here:
TensorShape current_shape = TensorShape::CreateUnknownRankShape(); TensorShape current_shape = TensorShape::CreateUnknownRankShape();
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &current_shape)); RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &current_shape));
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type())); RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.type(), tensor));
// Tensors are lazily allocated, this eagerly allocates memory for the tensor.
RETURN_IF_NOT_OK((*tensor)->AllocateBuffer((*tensor)->SizeInBytes()));
int64_t i = 0; int64_t i = 0;
auto it = (*tensor)->begin<T>(); auto it = (*tensor)->begin<T>();

View File

@ -375,7 +375,7 @@ Status VOCOp::LaunchThreadsAndInitOp() {
} }
Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) { Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path)); RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
if (decode_ == true) { if (decode_ == true) {
Status rc = Decode(*tensor, tensor); Status rc = Decode(*tensor, tensor);
if (rc.IsError()) { if (rc.IsError()) {
@ -412,18 +412,10 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
bbox_num++; bbox_num++;
} }
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(&bbox, data_schema_->column(1).tensorImpl(), TensorShape({bbox_num, 4}), RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_data, TensorShape({bbox_num, 4}), &bbox));
data_schema_->column(1).type(), RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_data, TensorShape({bbox_num, 1}), &label));
reinterpret_cast<unsigned char *>(&bbox_data[0]))); RETURN_IF_NOT_OK(Tensor::CreateFromVector(difficult_data, TensorShape({bbox_num, 1}), &difficult));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(2).tensorImpl(), TensorShape({bbox_num, 1}), RETURN_IF_NOT_OK(Tensor::CreateFromVector(truncate_data, TensorShape({bbox_num, 1}), &truncate));
data_schema_->column(2).type(),
reinterpret_cast<unsigned char *>(&label_data[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&difficult, data_schema_->column(3).tensorImpl(), TensorShape({bbox_num, 1}),
data_schema_->column(3).type(),
reinterpret_cast<unsigned char *>(&difficult_data[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&truncate, data_schema_->column(4).tensorImpl(), TensorShape({bbox_num, 1}),
data_schema_->column(4).type(),
reinterpret_cast<unsigned char *>(&truncate_data[0])));
(*row) = TensorRow({std::move(bbox), std::move(label), std::move(difficult), std::move(truncate)}); (*row) = TensorRow({std::move(bbox), std::move(label), std::move(difficult), std::move(truncate)});
return Status::OK(); return Status::OK();
} }

View File

@ -57,8 +57,7 @@ Status Graph::CreateTensorByVector(const std::vector<std::vector<T>> &data, Data
std::shared_ptr<Tensor> tensor; std::shared_ptr<Tensor> tensor;
size_t m = data.size(); size_t m = data.size();
size_t n = data[0].size(); size_t n = data[0].size();
RETURN_IF_NOT_OK(Tensor::CreateTensor( RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, &tensor));
&tensor, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, nullptr));
auto ptr = tensor->begin<T>(); auto ptr = tensor->begin<T>();
for (const auto &id_m : data) { for (const auto &id_m : data) {
CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size"); CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size");
@ -310,8 +309,7 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>()); dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>());
shape = shape.PrependDim(size); shape = shape.PrependDim(size);
std::shared_ptr<Tensor> fea_tensor; std::shared_ptr<Tensor> fea_tensor;
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, default_feature->Value()->type(), &fea_tensor));
Tensor::CreateTensor(&fea_tensor, TensorImpl::kFlexible, shape, default_feature->Value()->type(), nullptr));
dsize_t index = 0; dsize_t index = 0;
for (auto node_itr = nodes->begin<NodeIdType>(); node_itr != nodes->end<NodeIdType>(); ++node_itr) { for (auto node_itr = nodes->begin<NodeIdType>(); node_itr != nodes->end<NodeIdType>(); ++node_itr) {
@ -358,8 +356,7 @@ Status Graph::GetEdgeFeature(const std::shared_ptr<Tensor> &edges, const std::ve
dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>()); dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>());
shape = shape.PrependDim(size); shape = shape.PrependDim(size);
std::shared_ptr<Tensor> fea_tensor; std::shared_ptr<Tensor> fea_tensor;
RETURN_IF_NOT_OK( RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, default_feature->Value()->type(), &fea_tensor));
Tensor::CreateTensor(&fea_tensor, TensorImpl::kFlexible, shape, default_feature->Value()->type(), nullptr));
dsize_t index = 0; dsize_t index = 0;
for (auto edge_itr = edges->begin<EdgeIdType>(); edge_itr != edges->end<EdgeIdType>(); ++edge_itr) { for (auto edge_itr = edges->begin<EdgeIdType>(); edge_itr != edges->end<EdgeIdType>(); ++edge_itr) {

View File

@ -125,7 +125,7 @@ Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrec
(*feature_map)[node_type].insert(ind); (*feature_map)[node_type].insert(ind);
if ((*default_feature)[ind] == nullptr) { if ((*default_feature)[ind] == nullptr) {
std::shared_ptr<Tensor> zero_tensor; std::shared_ptr<Tensor> zero_tensor;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&zero_tensor, TensorImpl::kFlexible, tensor->shape(), tensor->type())); RETURN_IF_NOT_OK(Tensor::CreateEmpty(tensor->shape(), tensor->type(), &zero_tensor));
RETURN_IF_NOT_OK(zero_tensor->Zero()); RETURN_IF_NOT_OK(zero_tensor->Zero());
(*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor); (*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor);
} }
@ -151,7 +151,7 @@ Status GraphLoader::LoadEdge(const std::vector<uint8_t> &col_blob, const mindrec
(*feature_map)[edge_type].insert(ind); (*feature_map)[edge_type].insert(ind);
if ((*default_feature)[ind] == nullptr) { if ((*default_feature)[ind] == nullptr) {
std::shared_ptr<Tensor> zero_tensor; std::shared_ptr<Tensor> zero_tensor;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&zero_tensor, TensorImpl::kFlexible, tensor->shape(), tensor->type())); RETURN_IF_NOT_OK(Tensor::CreateEmpty(tensor->shape(), tensor->type(), &zero_tensor));
RETURN_IF_NOT_OK(zero_tensor->Zero()); RETURN_IF_NOT_OK(zero_tensor->Zero());
(*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor); (*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor);
} }
@ -170,9 +170,9 @@ Status GraphLoader::LoadFeatureTensor(const std::string &key, const std::vector<
key, col_blob, col_jsn, &data, &data_ptr, &n_bytes, &col_type, &col_type_size, &column_shape); key, col_blob, col_jsn, &data, &data_ptr, &n_bytes, &col_type, &col_type_size, &column_shape);
CHECK_FAIL_RETURN_UNEXPECTED(rs == mindrecord::SUCCESS, "fail to load column" + key); CHECK_FAIL_RETURN_UNEXPECTED(rs == mindrecord::SUCCESS, "fail to load column" + key);
if (data == nullptr) data = reinterpret_cast<const unsigned char *>(&data_ptr[0]); if (data == nullptr) data = reinterpret_cast<const unsigned char *>(&data_ptr[0]);
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, TensorImpl::kFlexible, RETURN_IF_NOT_OK(Tensor::CreateFromMemory(std::move(TensorShape({static_cast<dsize_t>(n_bytes / col_type_size)})),
std::move(TensorShape({static_cast<dsize_t>(n_bytes / col_type_size)})), std::move(DataType(mindrecord::ColumnDataTypeNameNormalized[col_type])),
std::move(DataType(mindrecord::ColumnDataTypeNameNormalized[col_type])), data)); data, tensor));
return Status::OK(); return Status::OK();
} }

View File

@ -33,6 +33,7 @@
#include "pybind11/stl.h" #include "pybind11/stl.h"
#endif #endif
#include "common/utils.h"
#include "minddata/dataset/core/constants.h" #include "minddata/dataset/core/constants.h"
#include "minddata/dataset/core/data_type.h" #include "minddata/dataset/core/data_type.h"
#include "minddata/dataset/core/tensor_shape.h" #include "minddata/dataset/core/tensor_shape.h"
@ -50,170 +51,155 @@ class Allocator;
using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>; using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors
using offset_t = uint32_t; // type of offset values to store strings locations
using TensorPtr = std::shared_ptr<Tensor>;
class Tensor { class Tensor {
public: public:
Tensor() = delete; Tensor() = delete;
// Create a new tensor, does not internally allocate storage. This constructor is protected, use CreateTensor.
// @note The shape and type information should be known and valid.
// @param shape TensorShape
// @param type DataType
Tensor(const TensorShape &shape, const DataType &type);
// Create a new tensor, allocates storage and copies in data. This constructor is protected, use CreateTensor.
// @note The buffer should be valid and the shape and type information should be known and valid.
// @param shape TensorShape
// @param type DataType
// @param data unsigned char*, pointer to the data.
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data);
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length);
Tensor(const Tensor &other) = delete; Tensor(const Tensor &other) = delete;
Tensor &operator=(const Tensor &other) = delete; Tensor &operator=(const Tensor &other) = delete;
/// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead
/// \note The shape and type information should be known and valid
/// \note The constructor does not allocate data
/// \param shape TensorShape
/// \param type DataType
Tensor(const TensorShape &shape, const DataType &type);
/// Move constructor
/// \param other Tensor to be moved
Tensor(Tensor &&other) noexcept; Tensor(Tensor &&other) noexcept;
/// Move assigment operator
/// \param other Tensor to be moved
Tensor &operator=(Tensor &&other) noexcept; Tensor &operator=(Tensor &&other) noexcept;
Status AllocateBuffer(const dsize_t &length); /// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized.
/// \param[in] shape shape of the output tensor
/// \param[in] type type of the output tensor
/// \param[out] out Generated tensor
/// \return Status code
static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out);
// type of offest values to store strings information /// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type.
using offset_t = uint32_t; /// Data will be copied into the new created tensor.
// const of the size of the offset variable /// \param[in] shape shape of the output tensor
static constexpr uint8_t kOffsetSize = sizeof(offset_t); /// \param[in] type type of the output tensor
// Tensor base class which holds the data in an unsigned char* buffer. /// \param[in] src pointer to the source data
/// \param[out] out Generated tensor
/// \return Status code
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out);
// Construct a scalar string Tensor /// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor.
explicit Tensor(const std::string &str) : Tensor(std::vector<std::string>{str}, TensorShape::CreateScalar()) {} /// \param[in] shape shape of the output tensor
/// \param[in] type type of the output tensor
/// \param[in] src pointer to the source data
/// \param[in] length length of the src data
/// \param[out] out Generated tensor
/// \return Status code
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src,
const dsize_t &length, TensorPtr *out);
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is /// Create a copy of the input tensor
// the size of the vector `strings`. /// \param[in] in original tensor to be copied
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings. /// \param[out] out output tensor to be generated
// Thr offset array will store one extra value to find the length of the last string. /// \return Status
// OFFSET1, OFFSET2, ..., OFFSETn+1, STRING1, STRING2, ..., STRINGn static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) {
// The value of each offset is the start index of the corresponding string return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
// Offsets is of type offest_t
// strings will ne null-terminated
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
// |----------------------------------------------------------------|
// | OFFSET ARRAY | STRINGS |
// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
// | 11 | 15 | 18 | abc\0 | de\0 |
// |----------------------------------------------------------------|
explicit Tensor(const std::vector<std::string> &strings,
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
// Same as Tensor(vector<string>) but the input is protobuf bytelist
explicit Tensor(const dataengine::BytesList &bytes_list,
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
// A static factory method to create the given flavour of derived Tensor
// Returns the base class reference for the Tensor.
// @param ptr output argument to hold the created Tensor of given tensor_impl
// @param tensor_impl - which implementation of Tensor
// @param shape - shape of the tensor
// @param type - datatype of the tensor
// @param data - data to be copied to Tensor new allocation
// @return Status Code
static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
const unsigned char *data = nullptr);
// Create a copy of the input tensor
// @param out [out] output tensor to be generated
// @param in [in] orginal tensor to be copied
// @return Status
static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
return Status::OK();
} }
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
// A static factory method to create a Tensor from a given py::array. /// Create a Tensor from a given py::array
// @param ptr output argument to hold the created Tensor /// \param[in] arr py::array
// @param arr py::array /// \param[out] out Created tensor
// @return Status Code /// \return Status Code
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr); static Status CreateFromNpArray(const py::array &arr, TensorPtr *out);
// Helper function to create a tensor from Numpy of strings
static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
#endif #endif
// A static factory method to create a Tensor from a given list of strings. /// Create a tensor of type DE_STRING from a BytesList.
// @param ptr output argument to hold the created Tensor /// \param[in] bytes_list protobuf's Bytelist
// @param strings elements of the tensor /// \param[in] shape shape of the outout tensor
// @param shape shape of the tensor /// \param[out] out created Tensor
// @return Status Code /// \return Status Code
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings, static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out);
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
// create tensor from protobuf bytelist with strings /// Create a tensor of type UINT8 or INT8 from a BytesList.
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, /// The tensor will be padded with ' ' to reach the required pad_size.
const TensorShape &shape); /// \param[in] bytes_list protobuf's Bytelist
/// \param[in] shape shape of the output tensor
/// \param[in] type type of created tensor. Should be DE_UINT8 or INT8
/// \param[in] pad_size The size of the tensor after padding
/// \param[out] out created Tensor
/// \return Status Code
static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
const DataType &type, dsize_t pad_size, TensorPtr *out);
// A static factory method to create a Tensor from a given list of numbers. /// Create a Tensor from a given list of values.
// @param ptr output argument to hold the created Tensor /// \tparam type of the values to be inserted.
// @param items elements of the tensor /// \param[in] items elements of the tensor
// @param shape shape of the tensor /// \param[in] shape shape of the output tensor
// @return Status Code /// \param[out] out output argument to hold the created Tensor
/// \return Status Code
template <typename T> template <typename T>
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<T> &items, static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) {
const TensorShape &shape_req = TensorShape::CreateUnknownRankShape()) { CHECK_FAIL_RETURN_UNEXPECTED(
items.size() == shape.NumOfElements(),
"Number of elements in the vector does not match the number of elements of the shape required");
DataType type = DataType::FromCType<T>(); DataType type = DataType::FromCType<T>();
// if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case.
auto items_ptr = reinterpret_cast<const uchar *>(&items[0]); auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
TensorShape shape = shape_req; return CreateFromMemory(shape, type, items_ptr, out);
if (!shape.known()) {
shape = TensorShape({static_cast<dsize_t>(items.size())});
}
return CreateTensor(ptr, TensorImpl::kFlexible, shape, type, items_ptr);
} }
// A static factory method to create a Tensor from a given number. /// Create a 1D Tensor from a given list of values.
// @param ptr output argument to hold the created Tensor /// \tparam type of the values to be inserted.
// @param item value /// \param[in] items elements of the tensor
// @return Status Code /// \param[out] out output argument to hold the created Tensor
/// \return Status Code
template <typename T> template <typename T>
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) { static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) {
return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar()); return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out);
} }
// Create tensor from protobuf bytelist with uint8 or int8 types /// Create a numeric scalar Tensor from the given value.
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list, /// \tparam T type of value
const TensorShape &shape, const DataType &type, dsize_t pad_size); /// \param[in] item value
/// \param[out] out Created tensor
/// \return Status code
template <typename T>
static Status CreateScalar(const T &item, TensorPtr *out) {
DataType type = DataType::FromCType<T>();
auto item_ptr = reinterpret_cast<const uchar *>(&item);
return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out);
}
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &path); /// Create a tensor from a binary file on disk.
/// \param[in] path file to be read
/// \param[out] out Created Tensor
/// \return Status code
static Status CreateFromFile(const std::string &path, TensorPtr *out);
// Copy raw data of a array based on shape and strides to the destination pointer /// Destruct the tensor and release the memory using the allocator
// @param dst Pointer to the destination array where the content is to be copied
// @param src Pointer to the source of strided array to be copied
// @param shape - shape of the source array
// @param strides - strides of the source array
// @param type_size - number of bytes needed to store one array element's type
// @return Status Code
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
std::vector<dsize_t> strides, uint8_t type_size);
// Release the memory using the allocator
virtual ~Tensor(); virtual ~Tensor();
// compare the tensor shape and data /// Equality operator. compares tensor shape, type and data
/// \param[in] rhs Tensor to be compared with
/// \return bool
bool operator==(const Tensor &rhs) const; bool operator==(const Tensor &rhs) const;
bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); } bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
// Get item located at `index`, caller needs to provide the type. /// Get item located at `index`, caller needs to provide the type.
// @tparam T /// \tparam T
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return the item specified at index /// \return return the item specified at index
template <typename T> template <typename T>
Status GetItemAt(T *o, const std::vector<dsize_t> &index) const; Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
// Get string located at `index`. /// Get string located at `index`.
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return std::string_view specified at index /// \return return std::string_view specified at index
Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const; Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
template <typename T> template <typename T>
@ -225,22 +211,21 @@ class Tensor {
template <typename T> template <typename T>
Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const; Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const;
// set item at location specified by index /// set item at location specified by index
// @tparam `T` /// \tparam `T`
// @param index /// \param[in] index
// @param value of type `T` /// \param[in] value of type `T`
template <typename T> template <typename T>
Status SetItemAt(const std::vector<dsize_t> &index, const T &value) { Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
T *ptr = nullptr; T *ptr = nullptr;
RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index)); RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
*ptr = value; *ptr = value;
return Status::OK(); return Status::OK();
} }
// set string item at location specified by index /// set string item at location specified by index
// @param index /// \param[in] index
// @param value of type std::string /// \param[in] value of type std::string
Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) { Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) {
RETURN_UNEXPECTED_IF_NULL(data_); RETURN_UNEXPECTED_IF_NULL(data_);
uchar *ptr = nullptr; uchar *ptr = nullptr;
@ -253,7 +238,8 @@ class Tensor {
return Status::OK(); return Status::OK();
} }
// fill tensor with Zeros. Does not support strings.
/// fill tensor with Zeros. Does not support strings.
Status Zero() { Status Zero() {
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings.."); CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings..");
dsize_t size = SizeInBytes(); dsize_t size = SizeInBytes();
@ -262,13 +248,12 @@ class Tensor {
return Status::OK(); return Status::OK();
} }
// Fill all elements in the Tensor with the given value of type `T`. Does not support strings. /// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
// @tparam T /// \tparam T
// @param value /// \param value[in]
template <typename T> template <typename T>
Status Fill(const T &value) { Status Fill(const T &value) {
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings."); CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings.");
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
int64_t cellSize = type_.SizeInBytes(); int64_t cellSize = type_.SizeInBytes();
if ((data_ != nullptr) && type_.IsCompatible<T>()) { if ((data_ != nullptr) && type_.IsCompatible<T>()) {
for (dsize_t i = 0; i < Size(); i++) { for (dsize_t i = 0; i < Size(); i++) {
@ -283,91 +268,86 @@ class Tensor {
} }
} }
// Getter function for shape /// Getter function for shape
// @return /// \return
const TensorShape &shape() const { return shape_; } const TensorShape &shape() const { return shape_; }
/// Check if tensor has data /// Check if tensor has data
/// \return bool - true if tensor is empty /// \return bool - true if tensor is empty
bool HasData() const; bool HasData() const { return data_ != nullptr; }
// Reshape the tensor. The given shape should have the same number of elements in the Tensor /// Reshape the tensor. The given shape should have the same number of elements in the Tensor
// @param shape /// \param shape
virtual Status Reshape(const TensorShape &shape); virtual Status Reshape(const TensorShape &shape);
// @return number of elements in this tensor /// \return number of elements in this tensor
dsize_t Size() const { return shape().NumOfElements(); } dsize_t Size() const { return shape().NumOfElements(); }
// @return the number of bytes this tensor is needs /// \return the number of bytes this tensor is needs
dsize_t SizeInBytes() const { dsize_t SizeInBytes() const {
if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements(); if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements();
return data_end_ - data_; return data_end_ - data_;
} }
// @return the rank of the tensor /// \return the rank of the tensor
dsize_t Rank() const { return shape().Rank(); } dsize_t Rank() const { return shape().Rank(); }
// Get the starting memory address as a constant for the data of the tensor. This potentially /// Get the starting memory address as a constant for the data of the tensor. This potentially
// drives an allocation if the data area. /// drives an allocation if the data area.
// @return const unsigned char* /// \return const unsigned char*
const unsigned char *GetBuffer() const; const unsigned char *GetBuffer() const { return data_; }
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the /// Getter of the type
// tensor's type is a string, otherwise undefined address would be returned. /// \return
// @return address of the first string of the tensor.
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
// Getter of the type
// @return
DataType type() const { return type_; } DataType type() const { return type_; }
// Provide stream operator for displaying it /// Provide stream operator for displaying it
// @param output stream /// \param output stream
// @param so the Tensor object to be printed /// \param so the Tensor object to be printed
// @return output stream /// \return output stream
friend std::ostream &operator<<(std::ostream &out, const Tensor &so) { friend std::ostream &operator<<(std::ostream &out, const Tensor &so) {
so.Print(out); so.Print(out);
return out; return out;
} }
// Invalidate this Tensor by setting the type and shape to unknown and MData to null. /// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
// Calling this method will make the Tensor and its data inaccessible, use it with caution. /// Calling this method will make the Tensor and its data inaccessible, use it with caution.
void Invalidate(); void Invalidate();
// Copy input tensor into self at the location index. /// Copy input tensor into self at the location index.
// Index is a vector of axises which can be incomplete: /// Index is a vector of axises which can be incomplete:
// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
// @param index /// \param index
// @param input /// \param input
// @return Status code /// \return Status code
Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
// Find the address of the given index. Used in InsertTensor. /// Find the address of the given index. Used in InsertTensor.
// Example: /// Example:
// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1 /// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
// @param index incomplete index /// \param index incomplete index
// @param output: startAddrofIndex /// \param output: startAddrofIndex
// @param output: remaining /// \param output: remaining
// @return Status code /// \return Status code
Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining); Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining);
// Expand the shape of the Tensor with one extra dimension. /// Expand the shape of the Tensor with one extra dimension.
// For example, if the shape is <512,512,3>: /// For example, if the shape is <512,512,3>:
// *- ExpandDim(0) gives: <1,512,512,3> /// *- ExpandDim(0) gives: <1,512,512,3>
// *- ExpandDim(1) gives: <512,1,512,3> /// *- ExpandDim(1) gives: <512,1,512,3>
// *- ExpandDim(3) gives: <512,512,3,1> /// *- ExpandDim(3) gives: <512,512,3,1>
// @param axis location of the dim /// \param axis location of the dim
virtual Status ExpandDim(const dsize_t &axis); virtual Status ExpandDim(const dsize_t &axis);
virtual void Squeeze(); virtual void Squeeze();
// Calculates the strides of the Tensor /// Calculates the strides of the Tensor
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte) /// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
// The strides will be {6,2,1}. /// The strides will be {6,2,1}.
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte) /// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
// The strides will be {24,8,4}. /// The strides will be {24,8,4}.
// @return vector of integers /// \return vector of integers
std::vector<dsize_t> Strides(); std::vector<dsize_t> Strides() const;
std::string ToString() { std::string ToString() {
std::stringstream ss; std::stringstream ss;
@ -375,26 +355,26 @@ class Tensor {
return ss.str(); return ss.str();
} }
// Handle negative indices. /// Handle negative indices.
static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; } static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported. /// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported.
// Based on the type of tensor, SliceNumeric or SliceString will be called /// Based on the type of tensor, SliceNumeric or SliceString will be called
// @param out Tensor /// \param[out] out Tensor
// @param indices vector of indices /// \param[in] indices vector of indices
// @return Status error code /// \return Status error code
Status Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); Status Slice(TensorPtr *out, const std::vector<dsize_t> &indices);
// Slice numeric tensors. /// Slice numeric tensors.
Status SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); Status SliceNumeric(TensorPtr *out, const std::vector<dsize_t> &indices);
// Slice string tensors /// Slice string tensors
Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices); Status SliceString(TensorPtr *out, const std::vector<dsize_t> &indices);
#ifdef ENABLE_PYTHON #ifdef ENABLE_PYTHON
// Constructs numpy array from input tensor /// Constructs numpy array from input tensor
// @param data this data is the location of python data /// \param[in] data this data is the location of python data
// @return Status code /// \return Status code
Status GetDataAsNumpy(py::array *data); Status GetDataAsNumpy(py::array *data);
Status GetDataAsNumpyStrings(py::array *data); Status GetDataAsNumpyStrings(py::array *data);
@ -402,12 +382,12 @@ class Tensor {
static Status GetBufferInfo(Tensor *t, py::buffer_info *out); static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
#endif #endif
// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor /// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input); Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
// @tparam T type of values in the Tensor Iterator /// \tparam T type of values in the Tensor Iterator
template <typename T, bool = true> template <typename T, bool = true>
class TensorIterator { class TensorIterator {
public: public:
@ -498,7 +478,7 @@ class Tensor {
}; };
// Specialization of TensorIterator for strings. It returns std::string_view for every item. // Specialization of TensorIterator for strings. It returns std::string_view for every item.
// @tparam DUMMY, used to mbe able to specialize the inner class // \tparam DUMMY, used to mbe able to specialize the inner class
template <bool DUMMY> template <bool DUMMY>
class TensorIterator<std::string_view, DUMMY> { class TensorIterator<std::string_view, DUMMY> {
public: public:
@ -585,84 +565,192 @@ class Tensor {
const char *data_; const char *data_;
}; };
// Return a TensorIterator that points to the start of the Tensor. /// Return a TensorIterator that points to the start of the Tensor.
// It's the user responsibility to use the correct type that matches the Tensor type /// It's the user responsibility to use the correct type that matches the Tensor type
// @param T The type of values in the Tensor /// \tparam T The type of values in the Tensor
// @return TensorIterator /// \return TensorIterator
template <typename T> template <typename T>
TensorIterator<T> begin() { TensorIterator<T> begin() {
AllocateBuffer(SizeInBytes());
return TensorIterator<T>(data_); return TensorIterator<T>(data_);
} }
// Return a linear iterator that points to the place after the last element of the Tensor. /// Return a linear iterator that points to the place after the last element of the Tensor.
// @tparam T The type of values in the Tensor /// \tparam T The type of values in the Tensor
// @return TensorIterator /// \return TensorIterator
template <typename T> template <typename T>
TensorIterator<T> end() { TensorIterator<T> end() {
return TensorIterator<T>(data_end_); return TensorIterator<T>(data_end_);
} }
// Copies the last dimension at `index` from Tensor `src` to this Tensor. /// Copies the last dimension at `index` from Tensor `src` to this Tensor.
// @param src Tensor /// \param[in] src Tensor
// @param index vector to the start of the dimension. The last dim should be 0 /// \param[in] index vector to the start of the dimension. The last dim should be 0
// @return Status /// \return Status
Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index); Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
protected: protected:
// Get the starting memory address for the data of the tensor. This potentially /// Allocate memory for the tensor using the data_allocator
// drives an allocation if the data is null. /// \param[in] length number of bytes to be allocated
// @return unsigned char* /// \return Error Status
unsigned char *GetMutableBuffer(); Status AllocateBuffer(const dsize_t &length);
// A function that prints Tensor recursively, first called by print /// Get the starting memory address for the data of the tensor. This potentially
// @param out /// drives an allocation if the data is null.
// @param cur_dim /// \return unsigned char*
// @param cur_index unsigned char *GetMutableBuffer() { return data_; }
/// A function that prints Tensor recursively, first called by print
/// \param[in] out
/// \param[in] cur_dim
/// \param[in] cur_index
void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const; void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const;
// A function that prints info about the tensor /// A function that prints info about the tensor
// @param out output stream /// \param[out] out output stream
void Print(std::ostream &out) const; void Print(std::ostream &out) const;
// A function that print the value as specified by its index /// A function that print the value as specified by its index
// @param index vector representing the index /// \param[in] index vector representing the index
// @param out /// \param[out] out
void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const; void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const;
// Get pointer to item located at `index`, caller needs to provide the type. /// Get pointer to item located at `index`, caller needs to provide the type.
// @tparam T /// \tparam T
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return a pointer to the item specified at index of type `T` /// \return return a pointer to the item specified at index of type `T`
template <typename T> template <typename T>
Status GetItemPtr(T **, const std::vector<dsize_t> &index) const; Status GetItemPtr(T **, const std::vector<dsize_t> &index) const;
// Get pointer to string located at `index` and the length of string /// Get pointer to string located at `index` and the length of string
// @param index vector<dsize_t> /// \param[in] index vector<dsize_t>
// @return return a pointer to the string specified at index and the length of the string /// \return return a pointer to the string specified at index and the length of the string
Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const; Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
// Given a flat index of an item string, return the start and length of the item /// Given a flat index of an item string, return the start and length of the item
// @param index flat index of the item /// \param[in] index flat index of the item
// @return start address of the ths string /// \param[out] start address of the ths string
// @return length of the string /// \param[out] length of the string
Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const; Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
// all access to shape_ should be via shape /// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if
/// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string
/// of the tensor.
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
/// all access to shape_ should be via shape
TensorShape shape_; TensorShape shape_;
// data type of tensor /// data type of tensor
DataType type_; DataType type_;
// pointer to the start of the physical data /// pointer to the start of the physical data
unsigned char *data_; unsigned char *data_;
// An allocator for data_ /// An allocator for data_
CharAllocPtr data_allocator_; CharAllocPtr data_allocator_;
// pointer to the end of the physical data /// pointer to the end of the physical data
unsigned char *data_end_ = nullptr; unsigned char *data_end_ = nullptr;
private:
/// Helper function to create a tensor from Numpy array of strings
/// \param[in] arr Numpy array
/// \param[out] out Created Tensor
/// \return Status
static Status CreateFromNpString(py::array arr, TensorPtr *out);
/// Copy raw data of a array based on shape and strides to the destination pointer
/// \param dst [out] Pointer to the destination array where the content is to be copied
/// \param[in] src Pointer to the source of strided array to be copied
/// \param[in] shape shape of the source array
/// \param[in] strides strides of the source array
/// \param[in] type_size number of bytes needed to store one array element's type
/// \return Status Code
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
std::vector<dsize_t> strides, uint8_t type_size);
/// const of the size of the offset variable
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
}; };
template <> template <>
inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() { inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
return TensorIterator<std::string_view>(data_, shape_.NumOfElements()); return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
} }
/// Create a Tensor from a given list of strings.
/// @note: The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
/// The offset array will store one extra value to find the length of the last string.
/// OFFSET_1, OFFSET_2, ..., OFFSET_n+1, STRING_1, STRING_2, ..., STRING_n
/// The value of each offset is the start index of the corresponding string
/// Offsets is of type offset_t
/// strings will ne null-terminated
/// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
/// |----------------------------------------------------------------|
/// | OFFSET ARRAY | STRINGS |
/// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
/// | 11 | 15 | 18 | abc\0 | de\0 |
/// |----------------------------------------------------------------|
/// \param[in] items elements of the tensor
/// \param[in] shape shape of the output tensor
/// \param[out] out output argument to hold the created Tensor
/// \return Status Code
template <>
inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
TensorPtr *out) {
CHECK_FAIL_RETURN_UNEXPECTED(
items.size() == shape.NumOfElements(),
"Number of elements in the vector does not match the number of elements of the shape required");
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
DataType(DataType::DE_STRING));
if (items.size() == 0) {
if (shape.known()) {
return (*out)->Reshape(shape);
}
}
auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
dsize_t total_length = std::accumulate(items.begin(), items.end(), 0, length_sum);
// total bytes needed = offset array + strings
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
// strings will be null-terminated --> need 1 extra byte per element
dsize_t num_bytes = (kOffsetSize + 1) * (*out)->shape_.NumOfElements() + kOffsetSize + total_length;
(*out)->AllocateBuffer(num_bytes);
auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
uchar *buf = (*out)->GetStringsBuffer();
offset_t offset = buf - (*out)->data_; // the first string will start here
uint32_t i = 0;
for (const auto &str : items) {
// insert the start index of the string.
offset_arr[i++] = offset;
// total bytes are reduced by kOffsetSize
num_bytes -= kOffsetSize;
// insert actual string
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
// next string will be stored right after the current one.
offset = offset + str.length() + 1;
// total bytes are reduced by the length of the string
num_bytes -= str.length() + 1;
}
// store one more offset value so we can get the length of the last string
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
offset_arr[i] = offset;
(*out)->data_end_ = (*out)->data_ + offset_arr[i];
MS_ASSERT(num_bytes == 0);
if (shape.known()) {
RETURN_IF_NOT_OK((*out)->Reshape(shape));
}
return Status::OK();
}
/// Create a string scalar Tensor from the given value.
/// \param[in] item value
/// \param[out] out Created tensor
/// \return Status code
template <>
inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
}
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_ #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_

View File

@ -97,7 +97,7 @@ Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou
if (input->Rank() == 1) num_elements = input->shape()[0]; if (input->Rank() == 1) num_elements = input->shape()[0];
TensorShape out_shape({num_elements, num_classes}); TensorShape out_shape({num_elements, num_classes});
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, out_shape, input->type())); RETURN_IF_NOT_OK(Tensor::CreateEmpty(out_shape, input->type(), &out));
RETURN_IF_NOT_OK(out->Zero()); RETURN_IF_NOT_OK(out->Zero());
for (dsize_t i = 0; i < num_elements; ++i) { for (dsize_t i = 0; i < num_elements; ++i) {
if (input->type().IsUnsignedInt()) { if (input->type().IsUnsignedInt()) {
@ -133,7 +133,9 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output
fill_output = fill_value; fill_output = fill_value;
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, input_shape, input_type)); if (input_type.IsNumeric()) {
RETURN_IF_NOT_OK(Tensor::CreateEmpty(input_shape, input_type, &out));
}
switch (input_type.value()) { switch (input_type.value()) {
case DataType::DE_BOOL: { case DataType::DE_BOOL: {
@ -216,7 +218,7 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output
for (int i = 0; i < input_shape.NumOfElements(); i++) { for (int i = 0; i < input_shape.NumOfElements(); i++) {
strings.emplace_back(fill_string); strings.emplace_back(fill_string);
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, input_shape)); RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, input_shape, &out));
break; break;
} }
case DataType::DE_UNKNOWN: { case DataType::DE_UNKNOWN: {
@ -285,9 +287,8 @@ void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
// Type cast operator // Type cast operator
Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) { Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), data_type)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), data_type, output));
RETURN_IF_NOT_OK((*output)->AllocateBuffer((*output)->SizeInBytes()));
switch (input->type().value()) { switch (input->type().value()) {
case DataType::DE_BOOL: case DataType::DE_BOOL:
CastFrom<bool>(input, output); CastFrom<bool>(input, output);
@ -335,8 +336,7 @@ Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
// initiate new tensor for type cast // initiate new tensor for type cast
DataType new_type = DataType("float16"); DataType new_type = DataType("float16");
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), new_type)); RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), new_type, output));
RETURN_IF_NOT_OK((*output)->AllocateBuffer((*output)->SizeInBytes()));
auto in_itr = input->begin<float>(); auto in_itr = input->begin<float>();
auto out_itr = (*output)->begin<float16>(); auto out_itr = (*output)->begin<float16>();
@ -387,7 +387,7 @@ Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor>
(*dst) = src; // if no padding, copy the pointer (*dst) = src; // if no padding, copy the pointer
} else { } else {
CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed"); CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed");
RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, TensorImpl::kFlexible, TensorShape(pad_shape), src->type())); RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(pad_shape), src->type(), dst));
auto tensor_type = src->type().value(); auto tensor_type = src->type().value();
if (pad_val == 0) { // if pad with zero, don't care what type it is if (pad_val == 0) { // if pad with zero, don't care what type it is
RETURN_IF_NOT_OK((*dst)->Zero()); RETURN_IF_NOT_OK((*dst)->Zero());
@ -447,7 +447,7 @@ Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor>
std::vector<dsize_t> cur_ind(src->Rank(), 0); std::vector<dsize_t> cur_ind(src->Rank(), 0);
std::vector<std::string> strings; std::vector<std::string> strings;
RETURN_IF_NOT_OK(PadEndStringHelper(src, &strings, TensorShape(pad_shape), cur_ind, 0, pad_val)); RETURN_IF_NOT_OK(PadEndStringHelper(src, &strings, TensorShape(pad_shape), cur_ind, 0, pad_val));
RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, strings, TensorShape(pad_shape))); RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, TensorShape(pad_shape), dst));
} }
return Status::OK(); return Status::OK();
} }
@ -521,7 +521,7 @@ Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
"Cannot convert constant value to the type of the input tensor."); "Cannot convert constant value to the type of the input tensor.");
CHECK_FAIL_RETURN_UNEXPECTED(value->shape() == TensorShape::CreateScalar(), "Value is not a scalar"); CHECK_FAIL_RETURN_UNEXPECTED(value->shape() == TensorShape::CreateScalar(), "Value is not a scalar");
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), DataType(DataType::DE_BOOL))); RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_BOOL), output));
std::unique_ptr<TypeCastOp> value_cast_op(new TypeCastOp(input->type())); std::unique_ptr<TypeCastOp> value_cast_op(new TypeCastOp(input->type()));
std::shared_ptr<Tensor> casted_value; std::shared_ptr<Tensor> casted_value;
@ -629,7 +629,7 @@ Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
if (input->type().IsNumeric()) { if (input->type().IsNumeric()) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, t, input->type())); RETURN_IF_NOT_OK(Tensor::CreateEmpty(t, input->type(), &out));
RETURN_IF_NOT_OK(out->Concatenate({0}, input)); RETURN_IF_NOT_OK(out->Concatenate({0}, input));
RETURN_IF_NOT_OK(out->Concatenate({input->shape()[0]}, append)); RETURN_IF_NOT_OK(out->Concatenate({input->shape()[0]}, append));
@ -645,7 +645,7 @@ Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
for (; itr != append->end<std::string_view>(); itr++) { for (; itr != append->end<std::string_view>(); itr++) {
strings.emplace_back(*itr); strings.emplace_back(*itr);
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, t)); RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, t, &out));
*output = out; *output = out;
} }

View File

@ -26,7 +26,7 @@ Status DuplicateOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output); IO_CHECK_VECTOR(input, output);
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor"); CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, input[0])); RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input[0], &out));
output->push_back(input[0]); output->push_back(input[0]);
output->push_back(out); output->push_back(out);
return Status::OK(); return Status::OK();

View File

@ -63,9 +63,8 @@ int GetCVBorderType(BorderType type) {
Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code) { Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code) {
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input)); std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
RETURN_IF_NOT_OK(output_cv->AllocateBuffer(output_cv->SizeInBytes()));
if (input_cv->mat().data) { if (input_cv->mat().data) {
try { try {
@ -110,8 +109,9 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
TensorShape shape{output_height, output_width}; TensorShape shape{output_height, output_width};
int num_channels = input_cv->shape()[2]; int num_channels = input_cv->shape()[2];
if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels); if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels);
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(shape, input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
auto cv_mode = GetCVInterpolationMode(mode); auto cv_mode = GetCVInterpolationMode(mode);
cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode); cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode);
*output = std::static_pointer_cast<Tensor>(output_cv); *output = std::static_pointer_cast<Tensor>(output_cv);
@ -147,8 +147,8 @@ Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
RETURN_STATUS_UNEXPECTED(err); RETURN_STATUS_UNEXPECTED(err);
} }
cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB)); cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(img_mat); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, &output_cv));
*output = std::static_pointer_cast<Tensor>(output_cv); *output = std::static_pointer_cast<Tensor>(output_cv);
return Status::OK(); return Status::OK();
} catch (const cv::Exception &e) { } catch (const cv::Exception &e) {
@ -309,7 +309,8 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
// three number of output components, always convert to RGB and output // three number of output components, always convert to RGB and output
constexpr int kOutNumComponents = 3; constexpr int kOutNumComponents = 3;
TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents}); TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8)); std::shared_ptr<Tensor> output_tensor;
RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
const int buffer_size = output_tensor->SizeInBytes(); const int buffer_size = output_tensor->SizeInBytes();
JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>())); JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
const int max_scanlines_to_read = skipped_scanlines + crop_h; const int max_scanlines_to_read = skipped_scanlines + crop_h;
@ -331,8 +332,8 @@ Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou
RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor");
} }
cv::Mat input_image = input_cv->mat(); cv::Mat input_image = input_cv->mat();
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), DataType(DataType::DE_FLOAT32)); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
try { try {
input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift); input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift);
*output = std::static_pointer_cast<Tensor>(output_cv); *output = std::static_pointer_cast<Tensor>(output_cv);
@ -354,8 +355,8 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
TensorShape shape{h, w}; TensorShape shape{h, w};
int num_channels = input_cv->shape()[2]; int num_channels = input_cv->shape()[2];
if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels); if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels);
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(shape, input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
cv::Rect roi(x, y, w, h); cv::Rect roi(x, y, w, h);
(input_cv->mat())(roi).copyTo(output_cv->mat()); (input_cv->mat())(roi).copyTo(output_cv->mat());
*output = std::static_pointer_cast<Tensor>(output_cv); *output = std::static_pointer_cast<Tensor>(output_cv);
@ -386,10 +387,11 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output)
int height = input_cv->shape()[0]; int height = input_cv->shape()[0];
int width = input_cv->shape()[1]; int width = input_cv->shape()[1];
auto output_cv = std::make_unique<CVTensor>(TensorShape{num_channels, height, width}, input_cv->type()); std::shared_ptr<CVTensor> output_cv;
CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, input_cv->type(), &output_cv);
for (int i = 0; i < num_channels; ++i) { for (int i = 0; i < num_channels; ++i) {
cv::Mat mat; cv::Mat mat;
RETURN_IF_NOT_OK(output_cv->Mat({i}, &mat)); RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
cv::extractChannel(input_cv->mat(), mat, i); cv::extractChannel(input_cv->mat(), mat, i);
} }
*output = std::move(output_cv); *output = std::move(output_cv);
@ -406,8 +408,9 @@ Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou
if (input_cv->shape().Size() != 3 || num_channels != 3) { if (input_cv->shape().Size() != 3 || num_channels != 3) {
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
} }
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB)); cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB));
*output = std::static_pointer_cast<Tensor>(output_cv); *output = std::static_pointer_cast<Tensor>(output_cv);
return Status::OK(); return Status::OK();
@ -440,8 +443,8 @@ Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso
TensorShape shape{target_height, target_width}; TensorShape shape{target_height, target_width};
int num_channels = input_cv->shape()[2]; int num_channels = input_cv->shape()[2];
if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels); if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels);
std::shared_ptr<CVTensor> cvt_out = std::make_shared<CVTensor>(shape, input_cv->type()); std::shared_ptr<CVTensor> cvt_out;
RETURN_UNEXPECTED_IF_NULL(cvt_out); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &cvt_out));
cv::resize(cv_in(roi), cvt_out->mat(), cv::Size(target_width, target_height), 0, 0, cv_mode); cv::resize(cv_in(roi), cvt_out->mat(), cv::Size(target_width, target_height), 0, 0, cv_mode);
*output = std::static_pointer_cast<Tensor>(cvt_out); *output = std::static_pointer_cast<Tensor>(cvt_out);
return Status::OK(); return Status::OK();
@ -475,8 +478,7 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
if (!expand) { if (!expand) {
// this case means that the shape doesn't change, size stays the same // this case means that the shape doesn't change, size stays the same
// We may not need this memcpy if it is in place. // We may not need this memcpy if it is in place.
output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
RETURN_UNEXPECTED_IF_NULL(output_cv);
// using inter_nearest to comply with python default // using inter_nearest to comply with python default
cv::warpAffine(input_img, output_cv->mat(), rot, input_img.size(), GetCVInterpolationMode(interpolation), cv::warpAffine(input_img, output_cv->mat(), rot, input_img.size(), GetCVInterpolationMode(interpolation),
cv::BORDER_CONSTANT, fill_color); cv::BORDER_CONSTANT, fill_color);
@ -489,7 +491,7 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
// use memcpy and don't compute the new shape since openCV has a rounding problem // use memcpy and don't compute the new shape since openCV has a rounding problem
cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation), cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
cv::BORDER_CONSTANT, fill_color); cv::BORDER_CONSTANT, fill_color);
output_cv = std::make_shared<CVTensor>(output_img); RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &output_cv));
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_UNEXPECTED_IF_NULL(output_cv);
} }
*output = std::static_pointer_cast<Tensor>(output_cv); *output = std::static_pointer_cast<Tensor>(output_cv);
@ -506,8 +508,8 @@ Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor"); RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor");
} }
cv::Mat in_image = input_cv->mat(); cv::Mat in_image = input_cv->mat();
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), DataType(DataType::DE_FLOAT32)); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
mean->Squeeze(); mean->Squeeze();
if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != 3) { if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != 3) {
std::string err_msg = "Mean tensor should be of size 3 and type float."; std::string err_msg = "Mean tensor should be of size 3 and type float.";
@ -548,8 +550,8 @@ Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
if (input_cv->Rank() != 3 || num_channels != 3) { if (input_cv->Rank() != 3 || num_channels != 3) {
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
} }
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
output_cv->mat() = input_img * alpha; output_cv->mat() = input_img * alpha;
*output = std::static_pointer_cast<Tensor>(output_cv); *output = std::static_pointer_cast<Tensor>(output_cv);
} catch (const cv::Exception &e) { } catch (const cv::Exception &e) {
@ -572,8 +574,8 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens
cv::Mat gray, output_img; cv::Mat gray, output_img;
cv::cvtColor(input_img, gray, CV_RGB2GRAY); cv::cvtColor(input_img, gray, CV_RGB2GRAY);
int mean_img = static_cast<int>(cv::mean(gray).val[0] + 0.5); int mean_img = static_cast<int>(cv::mean(gray).val[0] + 0.5);
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
output_img = cv::Mat::zeros(input_img.rows, input_img.cols, CV_8UC1); output_img = cv::Mat::zeros(input_img.rows, input_img.cols, CV_8UC1);
output_img = output_img + mean_img; output_img = output_img + mean_img;
cv::cvtColor(output_img, output_img, CV_GRAY2RGB); cv::cvtColor(output_img, output_img, CV_GRAY2RGB);
@ -680,7 +682,9 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
cv::Mat result; cv::Mat result;
cv::merge(image_result, result); cv::merge(image_result, result);
result.convertTo(result, input_cv->mat().type()); result.convertTo(result, input_cv->mat().type());
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(result); std::shared_ptr<CVTensor> output_cv;
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
(*output) = std::static_pointer_cast<Tensor>(output_cv);
(*output) = std::static_pointer_cast<Tensor>(output_cv); (*output) = std::static_pointer_cast<Tensor>(output_cv);
(*output)->Reshape(input->shape()); (*output)->Reshape(input->shape());
} catch (const cv::Exception &e) { } catch (const cv::Exception &e) {
@ -700,8 +704,8 @@ Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
if (input_cv->Rank() != 3 || num_channels != 3) { if (input_cv->Rank() != 3 || num_channels != 3) {
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
} }
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
cv::Mat output_img = output_cv->mat(); cv::Mat output_img = output_cv->mat();
cv::Mat gray; cv::Mat gray;
cv::cvtColor(input_img, gray, CV_RGB2GRAY); cv::cvtColor(input_img, gray, CV_RGB2GRAY);
@ -729,8 +733,8 @@ Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
if (input_cv->Rank() != 3 || num_channels != 3) { if (input_cv->Rank() != 3 || num_channels != 3) {
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3"); RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
} }
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
cv::Mat output_img; cv::Mat output_img;
cv::cvtColor(input_img, output_img, CV_RGB2HSV_FULL); cv::cvtColor(input_img, output_img, CV_RGB2HSV_FULL);
for (int y = 0; y < output_img.cols; y++) { for (int y = 0; y < output_img.cols; y++) {
@ -781,7 +785,8 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
} }
cv::Mat result; cv::Mat result;
cv::merge(image_result, result); cv::merge(image_result, result);
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(result); std::shared_ptr<CVTensor> output_cv;
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
(*output) = std::static_pointer_cast<Tensor>(output_cv); (*output) = std::static_pointer_cast<Tensor>(output_cv);
(*output)->Reshape(input->shape()); (*output)->Reshape(input->shape());
} catch (const cv::Exception &e) { } catch (const cv::Exception &e) {
@ -867,8 +872,8 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
} else { } else {
cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type); cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
} }
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(out_image); std::shared_ptr<CVTensor> output_cv;
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, &output_cv));
// pad the dimension if shape information is only 2 dimensional, this is grayscale // pad the dimension if shape information is only 2 dimensional, this is grayscale
int num_channels = input_cv->shape()[2]; int num_channels = input_cv->shape()[2];
if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2); if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2);
@ -932,7 +937,7 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
} }
} }
std::shared_ptr<Tensor> retV; std::shared_ptr<Tensor> retV;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&retV, copyVals, TensorShape({static_cast<dsize_t>(*bboxCount), bboxDim}))); RETURN_IF_NOT_OK(Tensor::CreateFromVector(copyVals, TensorShape({static_cast<dsize_t>(*bboxCount), bboxDim}), &retV));
(*bboxList) = retV; // reset pointer (*bboxList) = retV; // reset pointer
return Status::OK(); return Status::OK();
} }

View File

@ -40,8 +40,8 @@ Status InvertOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
if (num_channels != 3) { if (num_channels != 3) {
RETURN_STATUS_UNEXPECTED("The shape is incorrect: num of channels != 3"); RETURN_STATUS_UNEXPECTED("The shape is incorrect: num of channels != 3");
} }
std::shared_ptr<CVTensor> output_cv;
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type()); RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
RETURN_UNEXPECTED_IF_NULL(output_cv); RETURN_UNEXPECTED_IF_NULL(output_cv);
output_cv->mat() = cv::Scalar::all(255) - input_img; output_cv->mat() = cv::Scalar::all(255) - input_img;

View File

@ -24,20 +24,14 @@
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
NormalizeOp::NormalizeOp(float mean_r, float mean_g, float mean_b, float std_r, float std_g, float std_b) { NormalizeOp::NormalizeOp(float mean_r, float mean_g, float mean_b, float std_r, float std_g, float std_b) {
int size[] = {3}; Status s = Tensor::CreateFromVector<float>({mean_r, mean_g, mean_b}, &mean_);
cv::Mat mean_cv(1, size, CV_32F); if (s.IsError()) {
mean_cv.at<float>(0) = mean_r; MS_LOG(ERROR) << "Could not create mean tensor.";
mean_cv.at<float>(1) = mean_g; }
mean_cv.at<float>(2) = mean_b; s = Tensor::CreateFromVector<float>({std_r, std_g, std_b}, &std_);
mean_ = std::make_shared<CVTensor>(mean_cv); if (s.IsError()) {
mean_->Squeeze(); MS_LOG(ERROR) << "Could not create std tensor.";
}
cv::Mat std_cv(1, size, CV_32F);
std_cv.at<float>(0) = std_r;
std_cv.at<float>(1) = std_g;
std_cv.at<float>(2) = std_b;
std_ = std::make_shared<CVTensor>(std_cv);
std_->Squeeze();
} }
Status NormalizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) { Status NormalizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
@ -47,9 +41,7 @@ Status NormalizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
} }
void NormalizeOp::Print(std::ostream &out) const { void NormalizeOp::Print(std::ostream &out) const {
out << "NormalizeOp, mean: " << mean_->mat().at<float>(0) << ", " << mean_->mat().at<float>(1) << ", " out << "NormalizeOp, mean: " << mean_ << std::endl << "std: " << std_ << std::endl;
<< mean_->mat().at<float>(2) << "std: " << std_->mat().at<float>(0) << ", " << std_->mat().at<float>(1) << ", "
<< std_->mat().at<float>(2) << std::endl;
} }
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

View File

@ -39,8 +39,8 @@ class NormalizeOp : public TensorOp {
std::string Name() const override { return kNormalizeOp; } std::string Name() const override { return kNormalizeOp; }
private: private:
std::shared_ptr<CVTensor> mean_; std::shared_ptr<Tensor> mean_;
std::shared_ptr<CVTensor> std_; std::shared_ptr<Tensor> std_;
}; };
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

View File

@ -49,7 +49,7 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) {
if (py::isinstance<py::array>(ret_py_obj)) { if (py::isinstance<py::array>(ret_py_obj)) {
// In case of a n-1 mapping, the return value will be a numpy array // In case of a n-1 mapping, the return value will be a numpy array
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, ret_py_obj.cast<py::array>())); RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_obj.cast<py::array>(), &out));
output->push_back(out); output->push_back(out);
} else if (py::isinstance<py::tuple>(ret_py_obj)) { } else if (py::isinstance<py::tuple>(ret_py_obj)) {
// In case of a n-m mapping, the return value will be a tuple of numpy arrays // In case of a n-m mapping, the return value will be a tuple of numpy arrays
@ -61,7 +61,7 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) {
goto ShapeMisMatch; goto ShapeMisMatch;
} }
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, ret_py_ele.cast<py::array>())); RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast<py::array>(), &out));
output->push_back(out); output->push_back(out);
} }
} else { } else {

View File

@ -136,8 +136,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) { for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(*iter, kUnusedWords, &strs[i++])); RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(*iter, kUnusedWords, &strs[i++]));
} }
*output = std::make_shared<Tensor>(std::move(strs), input->shape()); return Tensor::CreateFromVector(strs, input->shape(), output);
return Status::OK();
} }
Status BasicTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { Status BasicTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {

View File

@ -39,8 +39,7 @@ Status CaseFoldOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr
nfkc_case_fold->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); nfkc_case_fold->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed."); CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed.");
} }
*output = std::make_shared<Tensor>(std::move(strs), input->shape()); return Tensor::CreateFromVector(strs, input->shape(), output);
return Status::OK();
} }
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

View File

@ -33,12 +33,7 @@ Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr
// if the data row has fewer items than width, the corresponding result row will be empty // if the data row has fewer items than width, the corresponding result row will be empty
if (out_shape.Size() == 0) { if (out_shape.Size() == 0) {
MS_LOG(WARNING) << "The data row has fewer items than width, the result will be empty."; MS_LOG(WARNING) << "The data row has fewer items than width, the result will be empty.";
if (input->type().value() == DataType::DE_STRING) { return Tensor::CreateEmpty(TensorShape({0}), input->type(), output);
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, std::vector<std::string>{}, TensorShape({0})));
} else {
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, TensorShape({0}), input->type()));
}
return Status::OK();
} }
axis = Tensor::HandleNeg(axis, input->shape().Size()); axis = Tensor::HandleNeg(axis, input->shape().Size());

View File

@ -68,15 +68,12 @@ Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
offsets_limit.push_back(static_cast<uint32_t>(item.offset + item.word.length())); offsets_limit.push_back(static_cast<uint32_t>(item.offset + item.word.length()));
} }
} }
token_tensor = std::make_shared<Tensor>(words, TensorShape({(dsize_t)words.size()})); RETURN_IF_NOT_OK(Tensor::CreateFromVector(words, &token_tensor));
output->push_back(token_tensor); output->push_back(token_tensor);
if (with_offsets_) { if (with_offsets_) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible, RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32), RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
reinterpret_cast<unsigned char *>(&offsets_start[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
output->push_back(offsets_start_tensor); output->push_back(offsets_start_tensor);
output->push_back(offsets_limit_tensor); output->push_back(offsets_limit_tensor);
} }

View File

@ -36,9 +36,7 @@ Status LookupOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
word_ids.back() != Vocab::kNoTokenExists, word_ids.back() != Vocab::kNoTokenExists,
"Lookup Error: token: " + std::string(*itr) + " doesn't exist in vocab and no unknown token is specified."); "Lookup Error: token: " + std::string(*itr) + " doesn't exist in vocab and no unknown token is specified.");
} }
RETURN_IF_NOT_OK(Tensor::CreateFromVector(word_ids, input->shape(), output));
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), type_,
reinterpret_cast<unsigned char *>(word_ids.data())));
return Status::OK(); return Status::OK();
} }
Status LookupOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) { Status LookupOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {

View File

@ -67,7 +67,7 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
} }
} }
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, res, TensorShape({static_cast<dsize_t>(res.size())}))); RETURN_IF_NOT_OK(Tensor::CreateFromVector(res, TensorShape({static_cast<dsize_t>(res.size())}), output));
return Status::OK(); return Status::OK();
} }

View File

@ -68,8 +68,7 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error); normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed."); CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed.");
} }
*output = std::make_shared<Tensor>(std::move(strs), input->shape()); return Tensor::CreateFromVector(strs, input->shape(), output);
return Status::OK();
} }
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

View File

@ -50,8 +50,7 @@ Status RegexReplaceOp::Compute(const std::shared_ptr<Tensor> &input, std::shared
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) { for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
RETURN_IF_NOT_OK(RegexReplace(&matcher, *iter, &strs[i])); RETURN_IF_NOT_OK(RegexReplace(&matcher, *iter, &strs[i]));
} }
*output = std::make_shared<Tensor>(std::move(strs), input->shape()); return Tensor::CreateFromVector(strs, input->shape(), output);
return Status::OK();
} }
} // namespace dataset } // namespace dataset
} // namespace mindspore } // namespace mindspore

View File

@ -120,15 +120,11 @@ Status RegexTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor; std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
RETURN_IF_NOT_OK(input[0]->GetItemAt(&text, {})); RETURN_IF_NOT_OK(input[0]->GetItemAt(&text, {}));
RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens, &offsets_start, &offsets_limit)); RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens, &offsets_start, &offsets_limit));
token_tensor = std::make_shared<Tensor>(std::move(tokens), TensorShape({(dsize_t)tokens.size()})); RETURN_IF_NOT_OK(Tensor::CreateFromVector(std::move(tokens), &token_tensor));
output->push_back(token_tensor); output->push_back(token_tensor);
if (with_offsets_) { if (with_offsets_) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible, RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32), RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
reinterpret_cast<unsigned char *>(&offsets_start[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
output->push_back(offsets_start_tensor); output->push_back(offsets_start_tensor);
output->push_back(offsets_limit_tensor); output->push_back(offsets_limit_tensor);
} }

View File

@ -69,14 +69,14 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
if (!status.ok()) { if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error"); RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
} }
*output = std::make_unique<Tensor>(pieces, TensorShape({(dsize_t)pieces.size()})); RETURN_IF_NOT_OK(Tensor::CreateFromVector(pieces, output));
} else { } else {
std::vector<int> ids; std::vector<int> ids;
auto status = processor_.Encode(sentence, &ids); auto status = processor_.Encode(sentence, &ids);
if (!status.ok()) { if (!status.ok()) {
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error"); RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, ids, TensorShape({(dsize_t)ids.size()}))); RETURN_IF_NOT_OK(Tensor::CreateFromVector(ids, output));
} }
return Status::OK(); return Status::OK();
} }

View File

@ -114,7 +114,7 @@ Status ToNumberOp::ToSignedIntegral(const std::shared_ptr<Tensor> &input, std::s
casted.push_back(casted_result); casted.push_back(casted_result);
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
return Status::OK(); return Status::OK();
} }
@ -157,7 +157,7 @@ Status ToNumberOp::ToUnsignedIntegral(const std::shared_ptr<Tensor> &input, std:
casted.push_back(casted_result); casted.push_back(casted_result);
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
return Status::OK(); return Status::OK();
} }
@ -165,7 +165,7 @@ Status ToNumberOp::ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_p
// special case, float16 does not exist in c++, no native support for // special case, float16 does not exist in c++, no native support for
// casting, so cast to float first then use this method, which use Eigen. // casting, so cast to float first then use this method, which use Eigen.
std::shared_ptr<Tensor> temp; std::shared_ptr<Tensor> temp;
RETURN_IF_NOT_OK(Tensor::CreateTensor(&temp, TensorImpl::kFlexible, input->shape(), DataType("float32"))); RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType("float32"), &temp));
RETURN_IF_NOT_OK(ToFloat(input, &temp)); RETURN_IF_NOT_OK(ToFloat(input, &temp));
RETURN_IF_NOT_OK(mindspore::dataset::ToFloat16(temp, output)); RETURN_IF_NOT_OK(mindspore::dataset::ToFloat16(temp, output));
return Status::OK(); return Status::OK();
@ -200,7 +200,7 @@ Status ToNumberOp::ToFloat(const std::shared_ptr<Tensor> &input, std::shared_ptr
casted.push_back(casted_result); casted.push_back(casted_result);
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
return Status::OK(); return Status::OK();
} }
@ -233,7 +233,7 @@ Status ToNumberOp::ToDouble(const std::shared_ptr<Tensor> &input, std::shared_pt
casted.push_back(casted_result); casted.push_back(casted_result);
} }
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape())); RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
return Status::OK(); return Status::OK();
} }

View File

@ -55,15 +55,13 @@ Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output
offsets_start.push_back(0); offsets_start.push_back(0);
offsets_limit.push_back(0); offsets_limit.push_back(0);
} }
token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()})); RETURN_IF_NOT_OK(Tensor::CreateFromVector(splits, &token_tensor));
output->push_back(token_tensor); output->push_back(token_tensor);
if (with_offsets_) { if (with_offsets_) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible, RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32), RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
reinterpret_cast<unsigned char *>(&offsets_start[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
output->push_back(offsets_start_tensor); output->push_back(offsets_start_tensor);
output->push_back(offsets_limit_tensor); output->push_back(offsets_limit_tensor);
} }

View File

@ -96,15 +96,12 @@ Status UnicodeScriptTokenizerOp::Compute(const TensorRow &input, TensorRow *outp
offsets_start.push_back(0); offsets_start.push_back(0);
offsets_limit.push_back(0); offsets_limit.push_back(0);
} }
token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()})); RETURN_IF_NOT_OK(Tensor::CreateFromVector(splits, &token_tensor));
output->push_back(token_tensor); output->push_back(token_tensor);
if (with_offsets_) { if (with_offsets_) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible, RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32), RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
reinterpret_cast<unsigned char *>(&offsets_start[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
output->push_back(offsets_start_tensor); output->push_back(offsets_start_tensor);
output->push_back(offsets_limit_tensor); output->push_back(offsets_limit_tensor);
} }

View File

@ -79,15 +79,12 @@ Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output)
offsets_start.push_back(0); offsets_start.push_back(0);
offsets_limit.push_back(0); offsets_limit.push_back(0);
} }
token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()})); RETURN_IF_NOT_OK(Tensor::CreateFromVector(splits, &token_tensor));
output->push_back(token_tensor); output->push_back(token_tensor);
if (with_offsets_) { if (with_offsets_) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible, RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32), RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
reinterpret_cast<unsigned char *>(&offsets_start[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
output->push_back(offsets_start_tensor); output->push_back(offsets_start_tensor);
output->push_back(offsets_limit_tensor); output->push_back(offsets_limit_tensor);
} }

View File

@ -1,157 +1,154 @@
/** /**
* Copyright 2020 Huawei Technologies Co., Ltd * Copyright 2020 Huawei Technologies Co., Ltd
* *
* Licensed under the Apache License, Version 2.0 (the "License"); * Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. * you may not use this file except in compliance with the License.
* You may obtain a copy of the License at * You may obtain a copy of the License at
* *
* http://www.apache.org/licenses/LICENSE-2.0 * http://www.apache.org/licenses/LICENSE-2.0
* *
* Unless required by applicable law or agreed to in writing, software * Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, * distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h" #include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
#include <algorithm> #include <algorithm>
#include <utility> #include <utility>
namespace mindspore { namespace mindspore {
namespace dataset { namespace dataset {
const char WordpieceTokenizerOp::kDefSuffixIndicator[] = "##"; const char WordpieceTokenizerOp::kDefSuffixIndicator[] = "##";
const int WordpieceTokenizerOp::kDefMaxBytesPerToken = 100; const int WordpieceTokenizerOp::kDefMaxBytesPerToken = 100;
const char WordpieceTokenizerOp::kDefUnknownToken[] = "[UNK]"; const char WordpieceTokenizerOp::kDefUnknownToken[] = "[UNK]";
const bool WordpieceTokenizerOp::kDefWithOffsets = false; const bool WordpieceTokenizerOp::kDefWithOffsets = false;
WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator, WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator,
const int &max_bytes_per_token, const std::string &unknown_token, const int &max_bytes_per_token, const std::string &unknown_token,
const bool &with_offsets) const bool &with_offsets)
: vocab_(vocab), : vocab_(vocab),
suffix_indicator_(suffix_indicator), suffix_indicator_(suffix_indicator),
max_bytes_per_token_(max_bytes_per_token), max_bytes_per_token_(max_bytes_per_token),
unknown_token_(unknown_token), unknown_token_(unknown_token),
with_offsets_(with_offsets) {} with_offsets_(with_offsets) {}
Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start, Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start,
bool *out_found, int *out_end) const { bool *out_found, int *out_end) const {
CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && start < input_token.size(), "Out of range"); CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && start < input_token.size(), "Out of range");
*out_found = false; *out_found = false;
for (int i = runes.size() - 1; i >= 0; i--) { for (int i = runes.size() - 1; i >= 0; i--) {
*out_end = runes[i].offset + runes[i].len; *out_end = runes[i].offset + runes[i].len;
int len = *out_end - start; int len = *out_end - start;
std::string word = input_token.substr(start, len); std::string word = input_token.substr(start, len);
if (start > 0) { if (start > 0) {
word = suffix_indicator_ + word; word = suffix_indicator_ + word;
} }
if (vocab_->Lookup(word) != Vocab::kNoTokenExists) { if (vocab_->Lookup(word) != Vocab::kNoTokenExists) {
*out_found = true; *out_found = true;
break; break;
} }
} }
return Status::OK(); return Status::OK();
} }
Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, const uint32_t &basic_start, Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, const uint32_t &basic_start,
std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start, std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
std::vector<uint32_t> *offsets_limit) const { std::vector<uint32_t> *offsets_limit) const {
out_tokens->clear(); out_tokens->clear();
offsets_start->push_back(basic_start); offsets_start->push_back(basic_start);
if (unknown_token_.empty()) { if (unknown_token_.empty()) {
out_tokens->emplace_back(input_token); out_tokens->emplace_back(input_token);
offsets_limit->push_back(basic_start + input_token.length()); offsets_limit->push_back(basic_start + input_token.length());
} else { } else {
out_tokens->emplace_back(unknown_token_); out_tokens->emplace_back(unknown_token_);
offsets_limit->push_back(basic_start + input_token.length()); offsets_limit->push_back(basic_start + input_token.length());
} }
return Status::OK(); return Status::OK();
} }
Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int &start, const int &end, Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int &start, const int &end,
std::vector<std::string> *out_tokens) const { std::vector<std::string> *out_tokens) const {
CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && end > start && end <= input_token.size(), "Out of range"); CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && end > start && end <= input_token.size(), "Out of range");
std::string subword = input_token.substr(start, end - start); std::string subword = input_token.substr(start, end - start);
if (start > 0) { if (start > 0) {
subword = suffix_indicator_ + subword; subword = suffix_indicator_ + subword;
} }
out_tokens->emplace_back(subword); out_tokens->emplace_back(subword);
return Status::OK(); return Status::OK();
} }
Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uint32_t &basic_start, Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uint32_t &basic_start,
std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start, std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
std::vector<uint32_t> *offsets_limit) const { std::vector<uint32_t> *offsets_limit) const {
if (input_token.size() > max_bytes_per_token_) { if (input_token.size() > max_bytes_per_token_) {
offsets_start->push_back(basic_start); offsets_start->push_back(basic_start);
if (!unknown_token_.empty()) { if (!unknown_token_.empty()) {
offsets_limit->push_back(basic_start + unknown_token_.size()); offsets_limit->push_back(basic_start + unknown_token_.size());
out_tokens->emplace_back(unknown_token_); out_tokens->emplace_back(unknown_token_);
} else { } else {
out_tokens->emplace_back(input_token); out_tokens->emplace_back(input_token);
offsets_limit->push_back(basic_start + input_token.size()); offsets_limit->push_back(basic_start + input_token.size());
} }
return Status::OK(); return Status::OK();
} }
RuneStrArray runes; RuneStrArray runes;
if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) { if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) {
RETURN_STATUS_UNEXPECTED("Decode utf8 string failed."); RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
} }
int end = 0; int end = 0;
for (int start = 0; start < input_token.size();) { for (int start = 0; start < input_token.size();) {
bool found = false; bool found = false;
RETURN_IF_NOT_OK(LookupWord(input_token, runes, start, &found, &end)); RETURN_IF_NOT_OK(LookupWord(input_token, runes, start, &found, &end));
if (found) { if (found) {
RETURN_IF_NOT_OK(AddSubword(input_token, start, end, out_tokens)); RETURN_IF_NOT_OK(AddSubword(input_token, start, end, out_tokens));
offsets_start->push_back(static_cast<uint32_t>(basic_start + start)); offsets_start->push_back(static_cast<uint32_t>(basic_start + start));
offsets_limit->push_back(static_cast<uint32_t>(basic_start + end)); offsets_limit->push_back(static_cast<uint32_t>(basic_start + end));
start = end; start = end;
} else { } else {
return FoundNoToken(input_token, basic_start, out_tokens, offsets_start, offsets_limit); return FoundNoToken(input_token, basic_start, out_tokens, offsets_start, offsets_limit);
} }
} }
return Status::OK(); return Status::OK();
} }
Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) { Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
IO_CHECK_VECTOR(input, output); IO_CHECK_VECTOR(input, output);
if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) { if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor"); RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
} }
dsize_t count = 0; dsize_t count = 0;
std::vector<std::string> out_tokens; std::vector<std::string> out_tokens;
std::vector<uint32_t> offsets_start, offsets_limit; std::vector<uint32_t> offsets_start, offsets_limit;
std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor; std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
for (auto iter = input[0]->begin<std::string_view>(); iter != input[0]->end<std::string_view>(); iter++) { for (auto iter = input[0]->begin<std::string_view>(); iter != input[0]->end<std::string_view>(); iter++) {
uint32_t basic_start = 0; uint32_t basic_start = 0;
std::vector<std::string> temp_tokens; std::vector<std::string> temp_tokens;
if (with_offsets_ && input.size() == 3) { if (with_offsets_ && input.size() == 3) {
RETURN_IF_NOT_OK(input[1]->GetItemAt<uint32_t>(&basic_start, {count, 0})); RETURN_IF_NOT_OK(input[1]->GetItemAt<uint32_t>(&basic_start, {count, 0}));
} }
RETURN_IF_NOT_OK(GetTokens(std::string(*iter), basic_start, &temp_tokens, &offsets_start, &offsets_limit)); RETURN_IF_NOT_OK(GetTokens(std::string(*iter), basic_start, &temp_tokens, &offsets_start, &offsets_limit));
out_tokens.insert(out_tokens.end(), temp_tokens.begin(), temp_tokens.end()); out_tokens.insert(out_tokens.end(), temp_tokens.begin(), temp_tokens.end());
count++; count++;
} }
if (out_tokens.empty()) { if (out_tokens.empty()) {
out_tokens.emplace_back(""); out_tokens.emplace_back("");
offsets_start.push_back(0); offsets_start.push_back(0);
offsets_limit.push_back(0); offsets_limit.push_back(0);
} }
token_tensor = std::make_shared<Tensor>(out_tokens, TensorShape({(dsize_t)out_tokens.size()})); Tensor::CreateFromVector(out_tokens, &token_tensor);
output->push_back(token_tensor); output->push_back(token_tensor);
if (with_offsets_) { if (with_offsets_) {
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible, RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32), RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
reinterpret_cast<unsigned char *>(&offsets_start[0])));
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible, output->push_back(offsets_start_tensor);
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32), output->push_back(offsets_limit_tensor);
reinterpret_cast<unsigned char *>(&offsets_limit[0]))); }
output->push_back(offsets_start_tensor); return Status::OK();
output->push_back(offsets_limit_tensor); }
}
return Status::OK(); } // namespace dataset
} } // namespace mindspore
} // namespace dataset
} // namespace mindspore

View File

@ -90,8 +90,8 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatch) {
rc = di.GetNextAsMap(&tensor_map); rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
std::shared_ptr<de::Tensor> t; std::shared_ptr<de::Tensor> t;
rc = de::Tensor::CreateTensor(&t, TensorImpl::kFlexible, de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload); (unsigned char *)payload, &t);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
// verify the actual data in Tensor is correct // verify the actual data in Tensor is correct
EXPECT_EQ(*t == *tensor_map["col_sint64"], true); EXPECT_EQ(*t == *tensor_map["col_sint64"], true);
@ -119,14 +119,14 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree); de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2, t3; std::shared_ptr<de::Tensor> t1, t2, t3;
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload); (unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 7)); (unsigned char *)(payload + 7), &t2);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateTensor(&t3, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 2)); (unsigned char *)(payload + 2), &t3);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map; TensorMap tensor_map;
@ -164,17 +164,17 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree); de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2, t3, t4; std::shared_ptr<de::Tensor> t1, t2, t3, t4;
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload); (unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 7)); (unsigned char *)(payload + 7), &t2);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateTensor(&t3, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 2)); (unsigned char *)(payload + 2), &t3);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateTensor(&t4, TensorImpl::kFlexible, de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 9)); (unsigned char *)(payload + 9), &t4);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map; TensorMap tensor_map;
@ -216,11 +216,11 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree); de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2; std::shared_ptr<de::Tensor> t1, t2;
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload); (unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 7)); (unsigned char *)(payload + 7), &t2);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map; TensorMap tensor_map;
@ -262,11 +262,11 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807}; -9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
de::DatasetIterator di(tree); de::DatasetIterator di(tree);
std::shared_ptr<de::Tensor> t1, t2; std::shared_ptr<de::Tensor> t1, t2;
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload); (unsigned char *)payload, &t1);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
(unsigned char *)(payload + 5)); (unsigned char *)(payload + 5), &t2);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
TensorMap tensor_map; TensorMap tensor_map;
@ -300,7 +300,7 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
std::shared_ptr<BatchOp> op; std::shared_ptr<BatchOp> op;
PadInfo m; PadInfo m;
std::shared_ptr<Tensor> pad_value; std::shared_ptr<Tensor> pad_value;
Tensor::CreateTensor(&pad_value, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32)); Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_value);
pad_value->SetItemAt<float>({}, -1); pad_value->SetItemAt<float>({}, -1);
m.insert({"col_1d", std::make_pair(TensorShape({4}), pad_value)}); m.insert({"col_1d", std::make_pair(TensorShape({4}), pad_value)});
de::BatchOp::Builder(12).SetDrop(false).SetPaddingMap(m, true).Build(&op); de::BatchOp::Builder(12).SetDrop(false).SetPaddingMap(m, true).Build(&op);
@ -359,8 +359,8 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
-1, -1,
-1}; -1};
std::shared_ptr<de::Tensor> t; std::shared_ptr<de::Tensor> t;
rc = de::Tensor::CreateTensor(&t, TensorImpl::kFlexible, de::TensorShape({12, 4}), de::DataType(DataType::DE_INT64), rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 4}), de::DataType(DataType::DE_INT64),
(unsigned char *)payload); (unsigned char *)payload, &t);
de::DatasetIterator di(tree); de::DatasetIterator di(tree);
TensorMap tensor_map; TensorMap tensor_map;
rc = di.GetNextAsMap(&tensor_map); rc = di.GetNextAsMap(&tensor_map);

View File

@ -75,7 +75,8 @@ TEST_F(MindDataTestCacheOp, TestCacheServer) {
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
// Create a tensor, take a snapshot and restore it back, and compare. // Create a tensor, take a snapshot and restore it back, and compare.
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_UINT64), &t);
t->SetItemAt<uint64_t>({0, 0}, 1); t->SetItemAt<uint64_t>({0, 0}, 1);
t->SetItemAt<uint64_t>({0, 1}, 2); t->SetItemAt<uint64_t>({0, 1}, 2);
t->SetItemAt<uint64_t>({0, 2}, 3); t->SetItemAt<uint64_t>({0, 2}, 3);
@ -129,7 +130,8 @@ TEST_F(MindDataTestCacheOp, TestConcurrencyRequest) {
rc = myClient.CreateCache(1, true); rc = myClient.CreateCache(1, true);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
std::cout << myClient << std::endl; std::cout << myClient << std::endl;
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_UINT64), &t);
t->SetItemAt<uint64_t>({0, 0}, 1); t->SetItemAt<uint64_t>({0, 0}, 1);
t->SetItemAt<uint64_t>({0, 1}, 2); t->SetItemAt<uint64_t>({0, 1}, 2);
t->SetItemAt<uint64_t>({0, 2}, 3); t->SetItemAt<uint64_t>({0, 2}, 3);
@ -403,11 +405,7 @@ TEST_F(MindDataTestCacheOp, TestImageFolderCacheMerge) {
// replace it with the required tree structures for cache lookup op and cache merge op. // replace it with the required tree structures for cache lookup op and cache merge op.
std::shared_ptr<CacheOp> myCacheOp; std::shared_ptr<CacheOp> myCacheOp;
rc = CacheOp::Builder() rc = CacheOp::Builder().SetNumWorkers(4).SetClient(myClient).SetRowsPerBuffer(3).Build(&myCacheOp);
.SetNumWorkers(4)
.SetClient(myClient)
.SetRowsPerBuffer(3)
.Build(&myCacheOp);
std::shared_ptr<ImageFolderOp> so; std::shared_ptr<ImageFolderOp> so;
ImageFolderOp::Builder builder; ImageFolderOp::Builder builder;

View File

@ -36,7 +36,7 @@ TEST_F(MindDataTestChannelSwap, TestOp) {
int size_buffer = s[0] * s[1] * s[2]; int size_buffer = s[0] * s[1] * s[2];
std::unique_ptr<uchar[]> output_buffer(new uchar[size_buffer]); std::unique_ptr<uchar[]> output_buffer(new uchar[size_buffer]);
std::shared_ptr<Tensor> output_tensor(new Tensor(s, DataType(DataType::DE_UINT8))); std::shared_ptr<Tensor> output_tensor;
// Decoding // Decoding
std::unique_ptr<HwcToChwOp> op(new HwcToChwOp()); std::unique_ptr<HwcToChwOp> op(new HwcToChwOp());

View File

@ -163,8 +163,11 @@ void BBoxOpCommon::CompareActualAndExpected(const std::string &op_name) {
// after comparison is done remove temporary file // after comparison is done remove temporary file
EXPECT_TRUE(remove(actual_path.c_str()) == 0); EXPECT_TRUE(remove(actual_path.c_str()) == 0);
// compare using ==operator by Tensor // compare using ==operator by Tensor
std::shared_ptr<CVTensor> expect_img_t, actual_img_t;
CVTensor::CreateFromMat(expect_img, &expect_img_t);
CVTensor::CreateFromMat(actual_img, &actual_img_t);
if (actual_img.data) { if (actual_img.data) {
EXPECT_EQ(CVTensor(expect_img) == CVTensor(actual_img), true); EXPECT_EQ(*expect_img_t == *actual_img_t, true);
} else { } else {
MS_LOG(ERROR) << "Not pass verification! Image data is null."; MS_LOG(ERROR) << "Not pass verification! Image data is null.";
EXPECT_EQ(0, 1); EXPECT_EQ(0, 1);
@ -223,7 +226,7 @@ bool BBoxOpCommon::LoadAnnotationFile(const std::string &path, std::shared_ptr<T
object = object->NextSiblingElement("object"); // Read next BBox if exists object = object->NextSiblingElement("object"); // Read next BBox if exists
} }
std::shared_ptr<Tensor> ret_value; std::shared_ptr<Tensor> ret_value;
Status s = Tensor::CreateTensor(&ret_value, return_value_list, TensorShape({bbox_count, bbox_val_count})); Status s = Tensor::CreateFromVector(return_value_list, TensorShape({bbox_count, bbox_val_count}), &ret_value);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
(*target_BBox) = ret_value; // load bbox from file into return (*target_BBox) = ret_value; // load bbox from file into return
return true; return true;

View File

@ -52,9 +52,11 @@ std::string CVOpCommon::GetFilename() {
void CVOpCommon::GetInputImage(std::string filename) { void CVOpCommon::GetInputImage(std::string filename) {
try { try {
Tensor::CreateTensor(&raw_input_tensor_, filename); Tensor::CreateFromFile(filename, &raw_input_tensor_);
raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR); raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR);
input_tensor_ = std::dynamic_pointer_cast<Tensor>(std::make_shared<CVTensor>(raw_cv_image_)); std::shared_ptr<CVTensor> input_cv_tensor;
CVTensor::CreateFromMat(raw_cv_image_, &input_cv_tensor);
input_tensor_ = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
SwapRedAndBlue(input_tensor_, &input_tensor_); SwapRedAndBlue(input_tensor_, &input_tensor_);
if (raw_cv_image_.data) { if (raw_cv_image_.data) {
MS_LOG(INFO) << "Reading was successful. Height:" << raw_cv_image_.rows << " Width: " << raw_cv_image_.cols MS_LOG(INFO) << "Reading was successful. Height:" << raw_cv_image_.rows << " Width: " << raw_cv_image_.cols

View File

@ -29,14 +29,14 @@ class MindDataTestConcatenateOp : public UT::Common {
TEST_F(MindDataTestConcatenateOp, TestOp) { TEST_F(MindDataTestConcatenateOp, TestOp) {
MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp."; MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp.";
uint64_t labels[3] = {1, 1, 2}; std::vector<uint64_t> labels = {1, 1, 2};
TensorShape shape({3}); TensorShape shape({3});
std::shared_ptr<Tensor> input = std::shared_ptr<Tensor> input;
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels)); Tensor::CreateFromVector(labels, &input);
uint64_t append_labels[3] = {4, 4, 4}; std::vector<uint64_t> append_labels = {4, 4, 4};
std::shared_ptr<Tensor> append = std::shared_ptr<Tensor> append;
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(append_labels)); Tensor::CreateFromVector(append_labels, &append);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::unique_ptr<ConcatenateOp> op(new ConcatenateOp(0, nullptr, append)); std::unique_ptr<ConcatenateOp> op(new ConcatenateOp(0, nullptr, append));
@ -44,10 +44,11 @@ TEST_F(MindDataTestConcatenateOp, TestOp) {
in.push_back(input); in.push_back(input);
TensorRow out_row; TensorRow out_row;
Status s = op->Compute(in, &out_row); Status s = op->Compute(in, &out_row);
uint64_t out[6] = {1, 1, 2, 4, 4, 4}; std::vector<uint64_t> out = {1, 1, 2, 4, 4, 4};
std::shared_ptr<Tensor> expected;
Tensor::CreateFromVector(out, &expected);
std::shared_ptr<Tensor> expected =
std::make_shared<Tensor>(TensorShape{6}, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(out));
output = out_row[0]; output = out_row[0];
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
ASSERT_TRUE(output->shape() == expected->shape()); ASSERT_TRUE(output->shape() == expected->shape());

View File

@ -32,9 +32,9 @@ class MindDataTestDuplicateOp : public UT::Common {
TEST_F(MindDataTestDuplicateOp, Basics) { TEST_F(MindDataTestDuplicateOp, Basics) {
std::shared_ptr<Tensor> t; std::shared_ptr<Tensor> t;
Tensor::CreateTensor(&t, std::vector<uint32_t>({1, 2, 3, 4, 5, 6})); Tensor::CreateFromVector(std::vector<uint32_t>({1, 2, 3, 4, 5, 6}), &t);
std::shared_ptr<Tensor> v; std::shared_ptr<Tensor> v;
Tensor::CreateTensor(&v, std::vector<uint32_t>({3}), TensorShape::CreateScalar()); Tensor::CreateFromVector(std::vector<uint32_t>({3}), TensorShape::CreateScalar(), &v);
std::shared_ptr<DuplicateOp> op = std::make_shared<DuplicateOp>(); std::shared_ptr<DuplicateOp> op = std::make_shared<DuplicateOp>();
TensorRow in; TensorRow in;
in.push_back(t); in.push_back(t);

View File

@ -29,23 +29,20 @@ class MindDataTestFillOp : public UT::Common {
TEST_F(MindDataTestFillOp, TestOp) { TEST_F(MindDataTestFillOp, TestOp) {
MS_LOG(INFO) << "Doing MindDataTestFillOp-TestOp."; MS_LOG(INFO) << "Doing MindDataTestFillOp-TestOp.";
uint64_t labels[3] = {1, 1, 2}; std::vector<uint64_t> labels = {1, 1, 2};
TensorShape shape({3}); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = Tensor::CreateFromVector(labels, &input);
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
TensorShape fill_shape({}); std::shared_ptr<Tensor> fill_tensor;
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_UINT64)); Tensor::CreateScalar<uint64_t>(4, &fill_tensor);
fill_tensor->SetItemAt<uint64_t>({}, 4);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::unique_ptr<FillOp> op(new FillOp(fill_tensor)); std::unique_ptr<FillOp> op(new FillOp(fill_tensor));
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
uint64_t out[3] = {4, 4, 4}; std::vector<uint64_t> out = {4, 4, 4};
std::shared_ptr<Tensor> expected;
std::shared_ptr<Tensor> expected = Tensor::CreateFromVector(out, &expected);
std::make_shared<Tensor>(TensorShape{3}, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(out));
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
ASSERT_TRUE(output->shape() == expected->shape()); ASSERT_TRUE(output->shape() == expected->shape());
@ -59,23 +56,20 @@ TEST_F(MindDataTestFillOp, TestOp) {
TEST_F(MindDataTestFillOp, TestCasting) { TEST_F(MindDataTestFillOp, TestCasting) {
MS_LOG(INFO) << "Doing MindDataTestFillOp-TestCasting."; MS_LOG(INFO) << "Doing MindDataTestFillOp-TestCasting.";
uint64_t labels[3] = {0, 1, 2}; std::vector<uint64_t> labels = {0, 1, 2};
TensorShape shape({3}); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = Tensor::CreateFromVector(labels, &input);
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
TensorShape fill_shape({}); std::shared_ptr<Tensor> fill_tensor;
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_FLOAT32)); Tensor::CreateScalar<float>(2.0, &fill_tensor);
fill_tensor->SetItemAt<float>({}, 2.0);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::unique_ptr<FillOp> op(new FillOp(fill_tensor)); std::unique_ptr<FillOp> op(new FillOp(fill_tensor));
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
uint64_t out[3] = {2, 2, 2}; std::vector<uint64_t> out = {2, 2, 2};
std::shared_ptr<Tensor> expected;
std::shared_ptr<Tensor> expected = Tensor::CreateFromVector(out, &expected);
std::make_shared<Tensor>(TensorShape{3}, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(out));
ASSERT_TRUE(output->shape() == expected->shape()); ASSERT_TRUE(output->shape() == expected->shape());
ASSERT_TRUE(output->type() == expected->type()); ASSERT_TRUE(output->type() == expected->type());
@ -90,15 +84,15 @@ TEST_F(MindDataTestFillOp, TestCasting) {
TEST_F(MindDataTestFillOp, ScalarFill) { TEST_F(MindDataTestFillOp, ScalarFill) {
MS_LOG(INFO) << "Doing MindDataTestFillOp-ScalarFill."; MS_LOG(INFO) << "Doing MindDataTestFillOp-ScalarFill.";
uint64_t labels[3] = {0, 1, 2}; std::vector<uint64_t> labels = {0, 1, 2};
TensorShape shape({3}); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = Tensor::CreateFromVector(labels, &input);
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
TensorShape fill_shape({2}); TensorShape fill_shape({2});
uint64_t fill_labels[3] = {0, 1}; std::vector<uint64_t> fill_labels = {0, 1};
std::shared_ptr<Tensor> fill_tensor = std::shared_ptr<Tensor> fill_tensor;
std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(fill_labels)); Tensor::CreateFromVector(fill_labels, &fill_tensor);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::unique_ptr<FillOp> op(new FillOp(fill_tensor)); std::unique_ptr<FillOp> op(new FillOp(fill_tensor));
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
@ -112,12 +106,11 @@ TEST_F(MindDataTestFillOp, ScalarFill) {
TEST_F(MindDataTestFillOp, StringFill) { TEST_F(MindDataTestFillOp, StringFill) {
MS_LOG(INFO) << "Doing MindDataTestFillOp-StringFill."; MS_LOG(INFO) << "Doing MindDataTestFillOp-StringFill.";
std::vector<std::string> strings = {"xyzzy", "plugh", "abracadabra"}; std::vector<std::string> strings = {"xyzzy", "plugh", "abracadabra"};
TensorShape shape({3}); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape); Tensor::CreateFromVector(strings, &input);
TensorShape fill_shape({}); std::shared_ptr<Tensor> fill_tensor;
std::string fill_string = "hello"; Tensor::CreateScalar<std::string>("hello", &fill_tensor);
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_string);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
@ -125,8 +118,8 @@ TEST_F(MindDataTestFillOp, StringFill) {
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
std::vector<std::string> expected_strings = {"hello", "hello", "hello"}; std::vector<std::string> expected_strings = {"hello", "hello", "hello"};
TensorShape expected_shape({3}); std::shared_ptr<Tensor> expected;
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(expected_strings, expected_shape); Tensor::CreateFromVector(expected_strings, &expected);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
ASSERT_TRUE(output->shape() == expected->shape()); ASSERT_TRUE(output->shape() == expected->shape());
@ -142,12 +135,11 @@ TEST_F(MindDataTestFillOp, StringFill) {
TEST_F(MindDataTestFillOp, NumericToString) { TEST_F(MindDataTestFillOp, NumericToString) {
MS_LOG(INFO) << "Doing MindDataTestFillOp-NumericToString."; MS_LOG(INFO) << "Doing MindDataTestFillOp-NumericToString.";
std::vector<std::string> strings = {"xyzzy", "plugh", "abracadabra"}; std::vector<std::string> strings = {"xyzzy", "plugh", "abracadabra"};
TensorShape shape({3}); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape); Tensor::CreateFromVector(strings, &input);
TensorShape fill_shape({}); std::shared_ptr<Tensor> fill_tensor;
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_FLOAT32)); Tensor::CreateScalar<float>(2.0, &fill_tensor);
fill_tensor->SetItemAt<float>({}, 2.0);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
@ -162,14 +154,12 @@ TEST_F(MindDataTestFillOp, NumericToString) {
TEST_F(MindDataTestFillOp, StringToNumeric) { TEST_F(MindDataTestFillOp, StringToNumeric) {
MS_LOG(INFO) << "Doing MindDataTestFillOp-StringToNumeric."; MS_LOG(INFO) << "Doing MindDataTestFillOp-StringToNumeric.";
uint64_t labels[3] = {0, 1, 2}; std::vector<uint64_t> labels = {0, 1, 2};
TensorShape shape({3}); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = Tensor::CreateFromVector(labels, &input);
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
TensorShape fill_shape({}); std::shared_ptr<Tensor> fill_tensor;
std::string fill_string = "hello"; Tensor::CreateScalar<std::string>("hello", &fill_tensor);
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_string);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;

View File

@ -68,8 +68,7 @@ std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int6
Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr, Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr,
DataType::Type data_type = DataType::DE_UINT32) { DataType::Type data_type = DataType::DE_UINT32) {
TensorShape shape(std::vector<int64_t>(1, num_elements)); TensorShape shape(std::vector<int64_t>(1, num_elements));
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, DataType(data_type), data)); RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(data_type), data, sample_ids));
(*sample_ids)->AllocateBuffer((*sample_ids)->SizeInBytes()); // allocate memory in case user forgets!
return Status::OK(); return Status::OK();
} }

View File

@ -42,7 +42,8 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opFuntions) {
TensorRow input, output; TensorRow input, output;
std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path)); std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("今天天气太好了我们一起去外面玩吧"); std::shared_ptr<Tensor> input_tensor;
Tensor::CreateScalar<std::string>("今天天气太好了我们一起去外面玩吧", &input_tensor);
input.push_back(input_tensor); input.push_back(input_tensor);
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -66,7 +67,8 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opAdd) {
std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path)); std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
op->AddWord("男默女泪"); op->AddWord("男默女泪");
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("男默女泪"); std::shared_ptr<Tensor> input_tensor;
Tensor::CreateScalar<std::string>("男默女泪", &input_tensor);
input.push_back(input_tensor); input.push_back(input_tensor);
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -84,7 +86,8 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opEmpty) {
std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path)); std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
op->AddWord("男默女泪"); op->AddWord("男默女泪");
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>(""); std::shared_ptr<Tensor> input_tensor;
Tensor::CreateScalar<std::string>("", &input_tensor);
input.push_back(input_tensor); input.push_back(input_tensor);
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());

View File

@ -71,9 +71,9 @@ TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) {
di.GetNextAsMap(&tensor_map); di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
uint64_t i = 0; uint64_t i = 0;
uint32_t label = 0; int32_t label = 0;
while (tensor_map.size() != 0) { while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<uint32_t>(&label, {}); tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(res[i] == label); EXPECT_TRUE(res[i] == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++; i++;
@ -101,9 +101,9 @@ TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) {
rc = di.GetNextAsMap(&tensor_map); rc = di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
uint64_t i = 0; uint64_t i = 0;
uint32_t label = 0; int32_t label = 0;
while (tensor_map.size() != 0) { while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<uint32_t>(&label, {}); tensor_map["label"]->GetItemAt<int32_t>(&label, {});
i++; i++;
di.GetNextAsMap(&tensor_map); di.GetNextAsMap(&tensor_map);
EXPECT_EQ(label, 1); EXPECT_EQ(label, 1);
@ -131,9 +131,9 @@ TEST_F(MindDataTestManifest, MindDataTestManifestClassIndex) {
di.GetNextAsMap(&tensor_map); di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
uint64_t i = 0; uint64_t i = 0;
uint32_t label = 0; int32_t label = 0;
while (tensor_map.size() != 0) { while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<uint32_t>(&label, {}); tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(label == res[i]); EXPECT_TRUE(label == res[i]);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++; i++;
@ -160,9 +160,9 @@ TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) {
di.GetNextAsMap(&tensor_map); di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
uint64_t i = 0; uint64_t i = 0;
uint32_t label = 0; int32_t label = 0;
while (tensor_map.size() != 0) { while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<uint32_t>(&label, {}); tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(0 == label); EXPECT_TRUE(0 == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++; i++;
@ -176,7 +176,7 @@ TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
std::string file = datasets_root_path_ + "/testManifestData/cpp.json"; std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
int64_t num_samples = 1; int64_t num_samples = 1;
int64_t start_index = 0; int64_t start_index = 0;
auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index); auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})}); auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})});
tree->Prepare(); tree->Prepare();
Status rc = tree->Launch(); Status rc = tree->Launch();
@ -189,9 +189,9 @@ TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
di.GetNextAsMap(&tensor_map); di.GetNextAsMap(&tensor_map);
EXPECT_TRUE(rc.IsOk()); EXPECT_TRUE(rc.IsOk());
uint64_t i = 0; uint64_t i = 0;
uint32_t label = 0; int32_t label = 0;
while (tensor_map.size() != 0) { while (tensor_map.size() != 0) {
tensor_map["label"]->GetItemAt<uint32_t>(&label, {}); tensor_map["label"]->GetItemAt<int32_t>(&label, {});
EXPECT_TRUE(0 == label); EXPECT_TRUE(0 == label);
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n"; MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
i++; i++;

View File

@ -38,9 +38,9 @@ class MindDataTestMaskOp : public UT::Common {
TEST_F(MindDataTestMaskOp, Basics) { TEST_F(MindDataTestMaskOp, Basics) {
std::shared_ptr<Tensor> t; std::shared_ptr<Tensor> t;
Tensor::CreateTensor(&t, std::vector<uint32_t>({1, 2, 3, 4, 5, 6})); Tensor::CreateFromVector(std::vector<uint32_t>({1, 2, 3, 4, 5, 6}), &t);
std::shared_ptr<Tensor> v; std::shared_ptr<Tensor> v;
Tensor::CreateTensor(&v, std::vector<uint32_t>({3}), TensorShape::CreateScalar()); Tensor::CreateFromVector(std::vector<uint32_t>({3}), TensorShape::CreateScalar(), &v);
std::shared_ptr<MaskOp> op = std::make_shared<MaskOp>(RelationalOp::kEqual, v, DataType(DataType::DE_UINT16)); std::shared_ptr<MaskOp> op = std::make_shared<MaskOp>(RelationalOp::kEqual, v, DataType(DataType::DE_UINT16));
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
ASSERT_TRUE(op->Compute(t, &out).IsOk()); ASSERT_TRUE(op->Compute(t, &out).IsOk());

View File

@ -29,19 +29,17 @@ class MindDataTestOneHotOp : public UT::Common {
TEST_F(MindDataTestOneHotOp, TestOp) { TEST_F(MindDataTestOneHotOp, TestOp) {
MS_LOG(INFO) << "Doing MindDataTestOneHotOp."; MS_LOG(INFO) << "Doing MindDataTestOneHotOp.";
uint64_t labels[3] = {0, 1, 2}; std::vector<uint64_t> labels = {0, 1, 2};
TensorShape shape({3}); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), Tensor::CreateFromVector(labels, &input);
reinterpret_cast <unsigned char *>(labels));
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::unique_ptr<OneHotOp> op(new OneHotOp(5)); std::unique_ptr<OneHotOp> op(new OneHotOp(5));
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
uint64_t out[15] = {1, 0, 0, 0, 0, std::vector<uint64_t> out = {1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0};
0, 1, 0, 0, 0, std::shared_ptr<Tensor> expected;
0, 0, 1, 0, 0}; Tensor::CreateFromVector(out, TensorShape{3, 5}, &expected);
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(TensorShape{3, 5}, DataType(DataType::DE_UINT64),
reinterpret_cast <unsigned char *>(out));
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
ASSERT_TRUE(output->shape() == expected->shape()); ASSERT_TRUE(output->shape() == expected->shape());
ASSERT_TRUE(output->type() == expected->type()); ASSERT_TRUE(output->type() == expected->type());

View File

@ -35,44 +35,40 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
TensorShape pad_data_shape({1}); TensorShape pad_data_shape({1});
// prepare input tensor // prepare input tensor
float_t orig1[4] = {1, 1, 1, 1}; std::vector<float> orig1 = {1, 1, 1, 1};
TensorShape input_shape1({2, 2}); TensorShape input_shape1({2, 2});
std::vector<TensorShape> input_shape1_vector = {input_shape1}; std::vector<TensorShape> input_shape1_vector = {input_shape1};
std::shared_ptr<Tensor> input1 = std::shared_ptr<Tensor> input1;
std::make_shared<Tensor>(input_shape1, DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(orig1)); Tensor::CreateFromVector(orig1, input_shape1, &input1);
// pad_shape // pad_shape
TensorShape pad_shape1[3] = {TensorShape({3, 3}), TensorShape({2, 4}), TensorShape({4, 2})}; TensorShape pad_shape1[3] = {TensorShape({3, 3}), TensorShape({2, 4}), TensorShape({4, 2})};
// value to pad // value to pad
float_t pad_data1[3][1] = {0, 3.5, 3.5}; std::vector<std::vector<float>> pad_data1 = {{0}, {3.5}, {3.5}};
std::shared_ptr<Tensor> expected1[3]; std::shared_ptr<Tensor> expected1[3];
// expected tensor output for testunit 1 // expected tensor output for testunit 1
float_t out1[9] = {1, 1, 0, 1, 1, 0, 0, 0, 0}; std::vector<float> out1 = {1, 1, 0, 1, 1, 0, 0, 0, 0};
Tensor::CreateFromVector(out1, pad_shape1[0], &(expected1[0]));
expected1[0] =
std::make_shared<Tensor>(pad_shape1[0], DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(out1));
// expected tensor output for testunit 2 // expected tensor output for testunit 2
float_t out2[8] = {1, 1, 3.5, 3.5, 1, 1, 3.5, 3.5}; std::vector<float> out2 = {1, 1, 3.5, 3.5, 1, 1, 3.5, 3.5};
Tensor::CreateFromVector(out2, pad_shape1[1], &(expected1[1]));
expected1[1] =
std::make_shared<Tensor>(pad_shape1[1], DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(out2));
// expected tensor output for testunit 3 // expected tensor output for testunit 3
float_t out3[8] = {1, 1, 1, 1, 3.5, 3.5, 3.5, 3.5}; std::vector<float> out3 = {1, 1, 1, 1, 3.5, 3.5, 3.5, 3.5};
Tensor::CreateFromVector(out3, pad_shape1[2], &(expected1[2]));
expected1[2] =
std::make_shared<Tensor>(pad_shape1[2], DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(out3));
// run the PadEndOp // run the PadEndOp
for (auto i = 0; i < 3; i++) { for (auto i = 0; i < 3; i++) {
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::vector<TensorShape> output_shape = {TensorShape({})}; std::vector<TensorShape> output_shape = {TensorShape({})};
std::shared_ptr<Tensor> pad_value1 = std::make_shared<Tensor>(pad_data_shape, DataType(DataType::DE_FLOAT32),
reinterpret_cast<unsigned char *>(pad_data1[i])); std::shared_ptr<Tensor> pad_value1;
Tensor::CreateFromVector(pad_data1[i], pad_data_shape, &pad_value1);
std::unique_ptr<PadEndOp> op(new PadEndOp(pad_shape1[i], pad_value1)); std::unique_ptr<PadEndOp> op(new PadEndOp(pad_shape1[i], pad_value1));
Status s = op->Compute(input1, &output); Status s = op->Compute(input1, &output);
@ -96,7 +92,7 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
TensorShape input_shape2({2}); TensorShape input_shape2({2});
std::vector<TensorShape> input_shape2_vector = {input_shape2}; std::vector<TensorShape> input_shape2_vector = {input_shape2};
std::shared_ptr<Tensor> input2; std::shared_ptr<Tensor> input2;
Tensor::CreateTensor(&input2, orig2, input_shape2); Tensor::CreateFromVector(orig2, input_shape2, &input2);
// pad_shape // pad_shape
TensorShape pad_shape2[3] = {TensorShape({5}), TensorShape({2}), TensorShape({10})}; TensorShape pad_shape2[3] = {TensorShape({5}), TensorShape({2}), TensorShape({10})};
@ -112,7 +108,7 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
for (auto i = 0; i < 3; i++) { for (auto i = 0; i < 3; i++) {
// pad value // pad value
Tensor::CreateTensor(&pad_value2[i], pad_data2[i], pad_data_shape); Tensor::CreateFromVector(pad_data2[i], pad_data_shape, &pad_value2[i]);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::vector<TensorShape> output_shape = {TensorShape({})}; std::vector<TensorShape> output_shape = {TensorShape({})};
@ -121,7 +117,7 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
Status s = op->Compute(input2, &output); Status s = op->Compute(input2, &output);
Tensor::CreateTensor(&expected2[i], outstring[i], pad_shape2[i]); Tensor::CreateFromVector(outstring[i], pad_shape2[i], &expected2[i]);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
ASSERT_TRUE(output->shape() == expected2[i]->shape()); ASSERT_TRUE(output->shape() == expected2[i]->shape());

View File

@ -93,7 +93,6 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) {
rc = di.FetchNextTensorRow(&tensor_list); rc = di.FetchNextTensorRow(&tensor_list);
} }
ASSERT_TRUE(rc.IsOk()); ASSERT_TRUE(rc.IsOk());
} }
TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromFileFuntions) { TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromFileFuntions) {
@ -166,9 +165,10 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) {
rc = di.FetchNextTensorRow(&tensor_list); rc = di.FetchNextTensorRow(&tensor_list);
} }
std::shared_ptr<Tensor> output_tensor; std::shared_ptr<Tensor> output_tensor;
std::unique_ptr<SentencePieceTokenizerOp> op(new SentencePieceTokenizerOp(spm, std::unique_ptr<SentencePieceTokenizerOp> op(
SPieceTokenizerLoadType::kModel, SPieceTokenizerOutType::kString)); new SentencePieceTokenizerOp(spm, SPieceTokenizerLoadType::kModel, SPieceTokenizerOutType::kString));
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("I saw a girl with a telescope."); std::shared_ptr<Tensor> input_tensor;
Tensor::CreateScalar<std::string>("I saw a girl with a telescope.", &input_tensor);
Status s = op->Compute(input_tensor, &output_tensor); Status s = op->Compute(input_tensor, &output_tensor);
std::vector<std::string> expect; std::vector<std::string> expect;

View File

@ -31,15 +31,17 @@ TEST_F(MindDataTestSlidingWindowOp, Compute) {
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->Compute."; MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->Compute.";
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"}; std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
TensorShape shape({static_cast<dsize_t>(strings.size())}); TensorShape shape({static_cast<dsize_t>(strings.size())});
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape); std::shared_ptr<Tensor> input;
Tensor::CreateFromVector(strings, shape, &input);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
std::unique_ptr<SlidingWindowOp> op(new SlidingWindowOp(3, 0)); std::unique_ptr<SlidingWindowOp> op(new SlidingWindowOp(3, 0));
Status s = op->Compute(input, &output); Status s = op->Compute(input, &output);
std::vector<std::string> out = {"one", "two", "three", "two", "three", "four", "three", "four", "five", std::vector<std::string> out = {"one", "two", "three", "two", "three", "four", "three", "four", "five",
"four", "five", "six", "five", "six", "seven", "six", "seven", "eight"}; "four", "five", "six", "five", "six", "seven", "six", "seven", "eight"};
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(out, TensorShape({6, 3})); std::shared_ptr<Tensor> expected;
Tensor::CreateFromVector(out, TensorShape({6, 3}), &expected);
ASSERT_TRUE(output->shape() == expected->shape()); ASSERT_TRUE(output->shape() == expected->shape());
ASSERT_TRUE(output->type() == expected->type()); ASSERT_TRUE(output->type() == expected->type());
@ -54,7 +56,8 @@ TEST_F(MindDataTestSlidingWindowOp, OutputShape) {
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->OutputShape."; MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->OutputShape.";
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"}; std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
TensorShape shape({static_cast<dsize_t>(strings.size())}); TensorShape shape({static_cast<dsize_t>(strings.size())});
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape); std::shared_ptr<Tensor> input;
Tensor::CreateFromVector(strings, shape, &input);
std::vector<TensorShape> input_shape = {input->shape()}; std::vector<TensorShape> input_shape = {input->shape()};
std::vector<TensorShape> output_shape = {TensorShape({})}; std::vector<TensorShape> output_shape = {TensorShape({})};

View File

@ -30,8 +30,7 @@ using namespace mindspore::dataset;
Status CreateINT64Tensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr) { Status CreateINT64Tensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr) {
TensorShape shape(std::vector<int64_t>(1, num_elements)); TensorShape shape(std::vector<int64_t>(1, num_elements));
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, DataType(DataType::DE_INT64), data)); RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(DataType::DE_INT64), data, sample_ids));
(*sample_ids)->AllocateBuffer((*sample_ids)->SizeInBytes()); // allocate memory in case user forgets!
return Status::OK(); return Status::OK();
} }
@ -54,8 +53,7 @@ TEST_F(MindDataTestStandAloneSampler, TestDistributedSampler) {
{0, 17, 4, 10, 14, 8, 15}, {13, 9, 16, 3, 2, 19, 12}, {1, 11, 6, 18, 7, 5, 0}}; {0, 17, 4, 10, 14, 8, 15}, {13, 9, 16, 3, 2, 19, 12}, {1, 11, 6, 18, 7, 5, 0}};
for (int i = 0; i < 6; i++) { for (int i = 0; i < 6; i++) {
std::shared_ptr<Tensor> t; std::shared_ptr<Tensor> t;
Tensor::CreateTensor(&t, TensorImpl::kFlexible, TensorShape({7}), Tensor::CreateFromMemory(TensorShape({7}), DataType(DataType::DE_INT64), (unsigned char *)(res[i]), &t);
DataType(DataType::DE_INT64), (unsigned char *)(res[i]));
row.push_back(t); row.push_back(t);
} }
MockStorageOp mock(20); MockStorageOp mock(20);

View File

@ -35,13 +35,15 @@ class MindDataTestStringTensorDE : public UT::Common {
}; };
TEST_F(MindDataTestStringTensorDE, Basics) { TEST_F(MindDataTestStringTensorDE, Basics) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>("Hi"); std::shared_ptr<Tensor> t;
Tensor::CreateScalar<std::string>("Hi", &t);
ASSERT_TRUE(t->shape() == TensorShape({})); ASSERT_TRUE(t->shape() == TensorShape({}));
std::string_view s = ""; std::string_view s = "";
t->GetItemAt(&s, {}); t->GetItemAt(&s, {});
ASSERT_TRUE(s == "Hi"); ASSERT_TRUE(s == "Hi");
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(std::vector<std::string>{"Hi", "Bye"}); std::shared_ptr<Tensor> t2;
Tensor::CreateFromVector(std::vector<std::string>{"Hi", "Bye"}, &t2);
ASSERT_TRUE(t2->shape() == TensorShape({2})); ASSERT_TRUE(t2->shape() == TensorShape({2}));
t2->GetItemAt(&s, {0}); t2->GetItemAt(&s, {0});
ASSERT_TRUE(s == "Hi"); ASSERT_TRUE(s == "Hi");
@ -49,7 +51,9 @@ TEST_F(MindDataTestStringTensorDE, Basics) {
ASSERT_TRUE(s == "Bye"); ASSERT_TRUE(s == "Bye");
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"}; std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3})); std::shared_ptr<Tensor> t3;
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t3);
ASSERT_TRUE(t3->shape() == TensorShape({2, 3})); ASSERT_TRUE(t3->shape() == TensorShape({2, 3}));
uint32_t index = 0; uint32_t index = 0;
for (uint32_t i = 0; i < 2; i++) { for (uint32_t i = 0; i < 2; i++) {
@ -62,8 +66,10 @@ TEST_F(MindDataTestStringTensorDE, Basics) {
} }
TEST_F(MindDataTestStringTensorDE, Basics2) { TEST_F(MindDataTestStringTensorDE, Basics2) {
std::shared_ptr<Tensor> t = std::shared_ptr<Tensor> t;
std::make_shared<Tensor>(std::vector<std::string>{"abc", "defg", "hi", "klmno", "123", "789"}, TensorShape({2, 3})); Tensor::CreateFromVector(std::vector<std::string>{"abc", "defg", "hi", "klmno", "123", "789"}, TensorShape({2, 3}),
&t);
ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 20 + 4); ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 20 + 4);
std::vector<uint32_t> offsets = {0, 4, 9, 12, 18, 22, 26}; std::vector<uint32_t> offsets = {0, 4, 9, 12, 18, 22, 26};
uint32_t ctr = 0; uint32_t ctr = 0;
@ -86,7 +92,8 @@ TEST_F(MindDataTestStringTensorDE, Basics2) {
TEST_F(MindDataTestStringTensorDE, Empty) { TEST_F(MindDataTestStringTensorDE, Empty) {
std::vector<std::string> strings{"abc", "defg", "", "", "123", ""}; std::vector<std::string> strings{"abc", "defg", "", "", "123", ""};
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3})); std::shared_ptr<Tensor> t;
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t);
// abc_defg___123__ // abc_defg___123__
// 0123456789012345 // 0123456789012345
ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 10 + 4); ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 10 + 4);
@ -112,7 +119,9 @@ TEST_F(MindDataTestStringTensorDE, Empty) {
TEST_F(MindDataTestStringTensorDE, SetItem) { TEST_F(MindDataTestStringTensorDE, SetItem) {
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"}; std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3})); std::shared_ptr<Tensor> t3;
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t3);
ASSERT_TRUE(t3->shape() == TensorShape({2, 3})); ASSERT_TRUE(t3->shape() == TensorShape({2, 3}));
t3->SetItemAt({0, 1}, std::string{"xyzz"}); t3->SetItemAt({0, 1}, std::string{"xyzz"});
@ -136,7 +145,8 @@ TEST_F(MindDataTestStringTensorDE, SetItem) {
TEST_F(MindDataTestStringTensorDE, Iterator) { TEST_F(MindDataTestStringTensorDE, Iterator) {
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"}; std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3})); std::shared_ptr<Tensor> t;
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t);
uint32_t index = 0; uint32_t index = 0;
auto itr = t->begin<std::string_view>(); auto itr = t->begin<std::string_view>();
for (; itr != t->end<std::string_view>(); itr++) { for (; itr != t->end<std::string_view>(); itr++) {

View File

@ -35,8 +35,9 @@ class MindDataTestTensorDE : public UT::Common {
}; };
TEST_F(MindDataTestTensorDE, Basics) { TEST_F(MindDataTestTensorDE, Basics) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64)); std::shared_ptr<Tensor> t;
ASSERT_TRUE((t->AllocateBuffer(t->SizeInBytes())).IsOk()); Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_UINT64), &t);
ASSERT_EQ(t->shape(), TensorShape({2, 3})); ASSERT_EQ(t->shape(), TensorShape({2, 3}));
ASSERT_EQ(t->type(), DataType::DE_UINT64); ASSERT_EQ(t->type(), DataType::DE_UINT64);
ASSERT_EQ(t->SizeInBytes(), 2 * 3 * 8); ASSERT_EQ(t->SizeInBytes(), 2 * 3 * 8);
@ -67,28 +68,30 @@ TEST_F(MindDataTestTensorDE, Basics) {
ASSERT_EQ(t->ToString(), "Tensor (shape: <2,3>, Type: uint64)\n[[1,2,3],[4,5,6]]"); ASSERT_EQ(t->ToString(), "Tensor (shape: <2,3>, Type: uint64)\n[[1,2,3],[4,5,6]]");
std::vector<uint64_t> x = {1, 2, 3, 4, 5, 6}; std::vector<uint64_t> x = {1, 2, 3, 4, 5, 6};
std::shared_ptr<Tensor> t2; std::shared_ptr<Tensor> t2;
Tensor::CreateTensor(&t2, x, TensorShape({2, 3})); Tensor::CreateFromVector(x, TensorShape({2, 3}), &t2);
ASSERT_EQ(*t == *t2, true); ASSERT_EQ(*t == *t2, true);
ASSERT_EQ(*t != *t2, false); ASSERT_EQ(*t != *t2, false);
} }
TEST_F(MindDataTestTensorDE, Fill) { TEST_F(MindDataTestTensorDE, Fill) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32), &t);
t->Fill<float>(2.5); t->Fill<float>(2.5);
std::vector<float> x = {2.5, 2.5, 2.5, 2.5}; std::vector<float> x = {2.5, 2.5, 2.5, 2.5};
std::shared_ptr<Tensor> t2; std::shared_ptr<Tensor> t2;
Tensor::CreateTensor(&t2, x, TensorShape({2, 2})); Tensor::CreateFromVector(x, TensorShape({2, 2}), &t2);
ASSERT_EQ(*t == *t2, true); ASSERT_EQ(*t == *t2, true);
} }
TEST_F(MindDataTestTensorDE, Reshape) { TEST_F(MindDataTestTensorDE, Reshape) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
t->Fill<uint8_t>(254); t->Fill<uint8_t>(254);
t->Reshape(TensorShape({4})); t->Reshape(TensorShape({4}));
std::vector<uint8_t> x = {254, 254, 254, 254}; std::vector<uint8_t> x = {254, 254, 254, 254};
std::shared_ptr<Tensor> t2; std::shared_ptr<Tensor> t2;
Tensor::CreateTensor(&t2, x); Tensor::CreateFromVector(x, &t2);
ASSERT_EQ(*t == *t2, true); ASSERT_EQ(*t == *t2, true);
Status rc = t->Reshape(TensorShape({5})); Status rc = t->Reshape(TensorShape({5}));
@ -102,7 +105,8 @@ TEST_F(MindDataTestTensorDE, Reshape) {
} }
TEST_F(MindDataTestTensorDE, CopyTensor) { TEST_F(MindDataTestTensorDE, CopyTensor) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({}), DataType(DataType::DE_INT16)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({}), DataType(DataType::DE_INT16), &t);
t->SetItemAt<int16_t>({}, -66); t->SetItemAt<int16_t>({}, -66);
ASSERT_EQ(t->shape(), TensorShape({})); ASSERT_EQ(t->shape(), TensorShape({}));
ASSERT_EQ(t->type(), DataType::DE_INT16); ASSERT_EQ(t->type(), DataType::DE_INT16);
@ -125,30 +129,31 @@ TEST_F(MindDataTestTensorDE, CopyTensor) {
} }
TEST_F(MindDataTestTensorDE, InsertTensor) { TEST_F(MindDataTestTensorDE, InsertTensor) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_FLOAT64)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_FLOAT64), &t);
std::vector<double> x = {1.1, 2.1, 3.1}; std::vector<double> x = {1.1, 2.1, 3.1};
std::shared_ptr<Tensor> t2; std::shared_ptr<Tensor> t2;
Tensor::CreateTensor(&t2, x); Tensor::CreateFromVector(x, &t2);
std::vector<double> y = {1.2, 2.2, 3.2}; std::vector<double> y = {1.2, 2.2, 3.2};
std::shared_ptr<Tensor> t3; std::shared_ptr<Tensor> t3;
Tensor::CreateTensor(&t3, y); Tensor::CreateFromVector(y, &t3);
ASSERT_TRUE(t->InsertTensor({0}, t2).OK()); ASSERT_TRUE(t->InsertTensor({0}, t2).OK());
ASSERT_TRUE(t->InsertTensor({1}, t3).OK()); ASSERT_TRUE(t->InsertTensor({1}, t3).OK());
std::vector<double> z = {1.1, 2.1, 3.1, 1.2, 2.2, 3.2}; std::vector<double> z = {1.1, 2.1, 3.1, 1.2, 2.2, 3.2};
std::shared_ptr<Tensor> t4; std::shared_ptr<Tensor> t4;
Tensor::CreateTensor(&t4, z, TensorShape({2, 3})); Tensor::CreateFromVector(z, TensorShape({2, 3}), &t4);
ASSERT_EQ(*t == *t4, true); ASSERT_EQ(*t == *t4, true);
std::shared_ptr<Tensor> t5; std::shared_ptr<Tensor> t5;
Tensor::CreateTensor<double>(&t5, 0); Tensor::CreateScalar<double>(0, &t5);
ASSERT_TRUE(t->InsertTensor({1, 2}, t5).OK()); ASSERT_TRUE(t->InsertTensor({1, 2}, t5).OK());
z[5] = 0; z[5] = 0;
std::shared_ptr<Tensor> t6; std::shared_ptr<Tensor> t6;
Tensor::CreateTensor(&t6, z, TensorShape({2, 3})); Tensor::CreateFromVector(z, TensorShape({2, 3}), &t6);
ASSERT_EQ(*t == *t6, true); ASSERT_EQ(*t == *t6, true);
ASSERT_EQ(t->InsertTensor({2}, t5).get_code(), StatusCode::kUnexpectedError); ASSERT_EQ(t->InsertTensor({2}, t5).get_code(), StatusCode::kUnexpectedError);
@ -161,7 +166,8 @@ TEST_F(MindDataTestTensorDE, InsertTensor) {
// Test the bug of Tensor::ToString will exec failed for Tensor which store bool values // Test the bug of Tensor::ToString will exec failed for Tensor which store bool values
TEST_F(MindDataTestTensorDE, BoolTensor) { TEST_F(MindDataTestTensorDE, BoolTensor) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2}), DataType(DataType::DE_BOOL)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2}), DataType(DataType::DE_BOOL), &t);
t->SetItemAt<bool>({0}, true); t->SetItemAt<bool>({0}, true);
t->SetItemAt<bool>({1}, true); t->SetItemAt<bool>({1}, true);
std::string out = t->ToString(); std::string out = t->ToString();
@ -169,7 +175,8 @@ TEST_F(MindDataTestTensorDE, BoolTensor) {
} }
TEST_F(MindDataTestTensorDE, GetItemAt) { TEST_F(MindDataTestTensorDE, GetItemAt) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
t->Fill<uint8_t>(254); t->Fill<uint8_t>(254);
uint64_t o1; uint64_t o1;
t->GetItemAt<uint64_t>(&o1, {0, 0}); t->GetItemAt<uint64_t>(&o1, {0, 0});
@ -183,7 +190,8 @@ TEST_F(MindDataTestTensorDE, GetItemAt) {
uint8_t o4; uint8_t o4;
t->GetItemAt<uint8_t>(&o4, {1, 1}); t->GetItemAt<uint8_t>(&o4, {1, 1});
ASSERT_EQ(o4, 254); ASSERT_EQ(o4, 254);
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_INT8)); std::shared_ptr<Tensor> t2;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_INT8), &t2);
t2->Fill<int8_t>(-10); t2->Fill<int8_t>(-10);
int64_t o5; int64_t o5;
t2->GetItemAt<int64_t>(&o5, {0, 0}); t2->GetItemAt<int64_t>(&o5, {0, 0});
@ -197,7 +205,8 @@ TEST_F(MindDataTestTensorDE, GetItemAt) {
int8_t o8; int8_t o8;
t2->GetItemAt<int8_t>(&o8, {1, 1}); t2->GetItemAt<int8_t>(&o8, {1, 1});
ASSERT_EQ(o8, -10); ASSERT_EQ(o8, -10);
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32)); std::shared_ptr<Tensor> t3;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32), &t3);
t3->Fill<float>(1.1); t3->Fill<float>(1.1);
double o9; double o9;
t3->GetItemAt<double>(&o9, {0, 0}); t3->GetItemAt<double>(&o9, {0, 0});
@ -208,9 +217,11 @@ TEST_F(MindDataTestTensorDE, GetItemAt) {
} }
TEST_F(MindDataTestTensorDE, OperatorAssign) { TEST_F(MindDataTestTensorDE, OperatorAssign) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
t->Fill<uint8_t>(1); t->Fill<uint8_t>(1);
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8)); std::shared_ptr<Tensor> t2;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t2);
*t2 = std::move(*t); *t2 = std::move(*t);
uint8_t o; uint8_t o;
t2->GetItemAt(&o, {0, 0}); t2->GetItemAt(&o, {0, 0});
@ -224,18 +235,20 @@ TEST_F(MindDataTestTensorDE, OperatorAssign) {
} }
TEST_F(MindDataTestTensorDE, Strides) { TEST_F(MindDataTestTensorDE, Strides) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT8)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT8), &t);
std::vector<dsize_t> x1 = t->Strides(); std::vector<dsize_t> x1 = t->Strides();
std::vector<dsize_t> x2 = {4, 2, 1}; std::vector<dsize_t> x2 = {4, 2, 1};
ASSERT_EQ(x1, x2); ASSERT_EQ(x1, x2);
t = std::make_shared<Tensor>(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT32)); Tensor::CreateEmpty(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT32), &t);
x1 = t->Strides(); x1 = t->Strides();
x2 = {16, 8, 4}; x2 = {16, 8, 4};
ASSERT_EQ(x1, x2); ASSERT_EQ(x1, x2);
} }
void checkCvMat(TensorShape shape, DataType type) { void checkCvMat(TensorShape shape, DataType type) {
std::shared_ptr<CVTensor> t = std::make_shared<CVTensor>(shape, type); std::shared_ptr<CVTensor> t;
CVTensor::CreateEmpty(shape, type, &t);
cv::Mat m = t->mat(); cv::Mat m = t->mat();
ASSERT_EQ(m.data, t->GetBuffer()); ASSERT_EQ(m.data, t->GetBuffer());
ASSERT_EQ(static_cast<uchar>(m.type()) & static_cast<uchar>(CV_MAT_DEPTH_MASK), type.AsCVType()); ASSERT_EQ(static_cast<uchar>(m.type()) & static_cast<uchar>(CV_MAT_DEPTH_MASK), type.AsCVType());
@ -289,8 +302,10 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
m.at<uint8_t>(0, 1) = 20; m.at<uint8_t>(0, 1) = 20;
m.at<uint8_t>(1, 0) = 30; m.at<uint8_t>(1, 0) = 30;
m.at<uint8_t>(1, 1) = 40; m.at<uint8_t>(1, 1) = 40;
std::shared_ptr<CVTensor> cvt = std::make_shared<CVTensor>(m); std::shared_ptr<CVTensor> cvt;
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8)); CVTensor::CreateFromMat(m, &cvt);
std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
t->SetItemAt<uint8_t>({0, 0}, 10); t->SetItemAt<uint8_t>({0, 0}, 10);
t->SetItemAt<uint8_t>({0, 1}, 20); t->SetItemAt<uint8_t>({0, 1}, 20);
t->SetItemAt<uint8_t>({1, 0}, 30); t->SetItemAt<uint8_t>({1, 0}, 30);
@ -302,8 +317,10 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
m2.at<uint8_t>(1) = 20; m2.at<uint8_t>(1) = 20;
m2.at<uint8_t>(2) = 30; m2.at<uint8_t>(2) = 30;
m2.at<uint8_t>(3) = 40; m2.at<uint8_t>(3) = 40;
std::shared_ptr<CVTensor> cvt2 = std::make_shared<CVTensor>(m2); std::shared_ptr<CVTensor> cvt2;
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({4}), DataType(DataType::DE_UINT8)); CVTensor::CreateFromMat(m2, &cvt2);
std::shared_ptr<Tensor> t2;
Tensor::CreateEmpty(TensorShape({4}), DataType(DataType::DE_UINT8), &t2);
t2->SetItemAt<uint8_t>({0}, 10); t2->SetItemAt<uint8_t>({0}, 10);
t2->SetItemAt<uint8_t>({1}, 20); t2->SetItemAt<uint8_t>({1}, 20);
t2->SetItemAt<uint8_t>({2}, 30); t2->SetItemAt<uint8_t>({2}, 30);
@ -313,10 +330,12 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
} }
TEST_F(MindDataTestTensorDE, CVTensorAs) { TEST_F(MindDataTestTensorDE, CVTensorAs) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64)); std::shared_ptr<Tensor> t;
Tensor::CreateEmpty(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64), &t);
t->Fill<double>(2.2); t->Fill<double>(2.2);
const unsigned char *addr = t->GetBuffer(); const unsigned char *addr = t->GetBuffer();
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64)); std::shared_ptr<Tensor> t2;
Tensor::CreateEmpty(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64), &t2);
t2->Fill<double>(4.4); t2->Fill<double>(4.4);
std::shared_ptr<CVTensor> ctv = CVTensor::AsCVTensor(t); std::shared_ptr<CVTensor> ctv = CVTensor::AsCVTensor(t);
ASSERT_EQ(t->GetBuffer(), nullptr); ASSERT_EQ(t->GetBuffer(), nullptr);
@ -326,6 +345,10 @@ TEST_F(MindDataTestTensorDE, CVTensorAs) {
ASSERT_EQ(ctv->GetBuffer(), addr); ASSERT_EQ(ctv->GetBuffer(), addr);
ASSERT_TRUE(*t2 == *ctv); ASSERT_TRUE(*t2 == *ctv);
MS_LOG(DEBUG) << *t2 << std::endl << *ctv; MS_LOG(DEBUG) << *t2 << std::endl << *ctv;
cv::Mat m2 = ctv->matCopy();
m2 = 2 * m2;
ASSERT_EQ(ctv->GetBuffer(), addr);
ASSERT_TRUE(*t2 == *ctv);
} }
TEST_F(MindDataTestTensorDE, CVTensorMatSlice) { TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
@ -336,23 +359,26 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
m.at<int32_t>(1, 0) = 40; m.at<int32_t>(1, 0) = 40;
m.at<int32_t>(1, 1) = 50; m.at<int32_t>(1, 1) = 50;
m.at<int32_t>(1, 2) = 60; m.at<int32_t>(1, 2) = 60;
std::shared_ptr<CVTensor> cvt = std::make_shared<CVTensor>(m); std::shared_ptr<CVTensor> cvt;
CVTensor::CreateFromMat(m, &cvt);
cv::Mat mat; cv::Mat mat;
cvt->Mat({1}, &mat); cvt->MatAtIndex({1}, &mat);
cv::Mat m2(3, 1, CV_32S); cv::Mat m2(3, 1, CV_32S);
m2.at<int32_t>(0) = 40; m2.at<int32_t>(0) = 40;
m2.at<int32_t>(1) = 50; m2.at<int32_t>(1) = 50;
m2.at<int32_t>(2) = 60; m2.at<int32_t>(2) = 60;
std::shared_ptr<CVTensor> cvt2 = std::make_shared<CVTensor>(mat); std::shared_ptr<CVTensor> cvt2;
std::shared_ptr<CVTensor> cvt3 = std::make_shared<CVTensor>(m2); CVTensor::CreateFromMat(mat, &cvt2);
std::shared_ptr<CVTensor> cvt3;
CVTensor::CreateFromMat(m2, &cvt3);
ASSERT_TRUE(*cvt2 == *cvt3); ASSERT_TRUE(*cvt2 == *cvt3);
cvt->Mat({0}, &mat); cvt->MatAtIndex({0}, &mat);
m2.at<int32_t>(0) = 10; m2.at<int32_t>(0) = 10;
m2.at<int32_t>(1) = 20; m2.at<int32_t>(1) = 20;
m2.at<int32_t>(2) = 30; m2.at<int32_t>(2) = 30;
cvt2 = std::make_shared<CVTensor>(mat); CVTensor::CreateFromMat(mat, &cvt2);
cvt3 = std::make_shared<CVTensor>(m2); CVTensor::CreateFromMat(m2, &cvt3);
ASSERT_TRUE(*cvt2 == *cvt3); ASSERT_TRUE(*cvt2 == *cvt3);
} }
@ -361,7 +387,7 @@ TEST_F(MindDataTestTensorDE, TensorIterator) {
std::vector<uint32_t> values2 = {2, 3, 4, 5, 6, 7}; std::vector<uint32_t> values2 = {2, 3, 4, 5, 6, 7};
std::shared_ptr<Tensor> t; std::shared_ptr<Tensor> t;
Tensor::CreateTensor(&t, values); Tensor::CreateFromVector(values, &t);
auto i = t->begin<uint32_t>(); auto i = t->begin<uint32_t>();
auto j = values.begin(); auto j = values.begin();
@ -395,11 +421,11 @@ TEST_F(MindDataTestTensorDE, TensorIterator) {
TEST_F(MindDataTestTensorDE, TensorSlice) { TEST_F(MindDataTestTensorDE, TensorSlice) {
std::shared_ptr<Tensor> t; std::shared_ptr<Tensor> t;
Tensor::CreateTensor(&t, std::vector<dsize_t>{0, 1, 2, 3, 4}); Tensor::CreateFromVector(std::vector<dsize_t>{0, 1, 2, 3, 4}, &t);
std::shared_ptr<Tensor> t2; std::shared_ptr<Tensor> t2;
auto x = std::vector<dsize_t>{0, 3, 4}; auto x = std::vector<dsize_t>{0, 3, 4};
std::shared_ptr<Tensor> expected; std::shared_ptr<Tensor> expected;
Tensor::CreateTensor(&expected, x); Tensor::CreateFromVector(x, &expected);
t->Slice(&t2, x); t->Slice(&t2, x);
ASSERT_EQ(*t2, *expected); ASSERT_EQ(*t2, *expected);
t->Slice(&t2, std::vector<dsize_t>{0, 1, 2, 3, 4}); t->Slice(&t2, std::vector<dsize_t>{0, 1, 2, 3, 4});
@ -412,13 +438,13 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) {
std::vector<uint32_t> expected = {1, 2, 3, 4, 5, 6}; std::vector<uint32_t> expected = {1, 2, 3, 4, 5, 6};
std::shared_ptr<Tensor> t1; std::shared_ptr<Tensor> t1;
Tensor::CreateTensor(&t1, values1); Tensor::CreateFromVector(values1, &t1);
std::shared_ptr<Tensor> t2; std::shared_ptr<Tensor> t2;
Tensor::CreateTensor(&t2, values2); Tensor::CreateFromVector(values2, &t2);
std::shared_ptr<Tensor> out; std::shared_ptr<Tensor> out;
Tensor::CreateTensor(&out, expected); Tensor::CreateFromVector(expected, &out);
Status s = t1->Concatenate({3}, t2); Status s = t1->Concatenate({3}, t2);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -434,15 +460,80 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) {
} }
TEST_F(MindDataTestTensorDE, TensorEmpty) { TEST_F(MindDataTestTensorDE, TensorEmpty) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64)); TensorPtr t;
ASSERT_TRUE(t->HasData()); Status rc = Tensor::CreateEmpty(TensorShape({0}), DataType(DataType::DE_UINT64), &t);
} ASSERT_TRUE(rc.IsOk());
TEST_F(MindDataTestTensorDE, TensorEmptyInvalidate) { ASSERT_EQ(t->shape(), TensorShape({0}));
std::vector<uint32_t> values1 = {1, 2, 3, 0, 0, 0}; ASSERT_EQ(t->type(), DataType::DE_UINT64);
std::shared_ptr<Tensor> t; ASSERT_EQ(t->SizeInBytes(), 0);
Tensor::CreateTensor(&t, values1); ASSERT_EQ(t->GetBuffer(), nullptr);
t->Invalidate(); ASSERT_TRUE(!t->HasData());
ASSERT_TRUE(t->HasData());
}
rc = t->SetItemAt<uint64_t>({0}, 7);
ASSERT_TRUE(rc.IsError());
rc = Tensor::CreateEmpty(TensorShape({1, 0}), DataType(DataType::DE_STRING), &t);
ASSERT_TRUE(rc.IsOk());
ASSERT_EQ(t->shape(), TensorShape({1, 0}));
ASSERT_EQ(t->type(), DataType::DE_STRING);
ASSERT_EQ(t->SizeInBytes(), 0);
ASSERT_EQ(t->GetBuffer(), nullptr);
ASSERT_TRUE(!t->HasData());
std::vector<uint16_t> data;
rc = Tensor::CreateFromVector(data, &t);
ASSERT_TRUE(rc.IsOk());
ASSERT_EQ(t->shape(), TensorShape({0}));
ASSERT_EQ(t->type(), DataType::DE_UINT16);
ASSERT_EQ(t->SizeInBytes(), 0);
ASSERT_EQ(t->GetBuffer(), nullptr);
ASSERT_TRUE(!t->HasData());
std::vector<std::string> data2;
rc = Tensor::CreateFromVector(data2, &t);
ASSERT_TRUE(rc.IsOk());
ASSERT_EQ(t->shape(), TensorShape({0}));
ASSERT_EQ(t->type(), DataType::DE_STRING);
ASSERT_EQ(t->SizeInBytes(), 0);
ASSERT_EQ(t->GetBuffer(), nullptr);
ASSERT_TRUE(!t->HasData());
rc = Tensor::CreateFromVector(data, TensorShape({0, 2}), &t);
ASSERT_TRUE(rc.IsOk());
ASSERT_EQ(t->shape(), TensorShape({0, 2}));
ASSERT_EQ(t->type(), DataType::DE_UINT16);
ASSERT_EQ(t->SizeInBytes(), 0);
ASSERT_EQ(t->GetBuffer(), nullptr);
ASSERT_TRUE(!t->HasData());
rc = Tensor::CreateFromVector(data2, TensorShape({0, 0, 6}), &t);
ASSERT_TRUE(rc.IsOk());
ASSERT_EQ(t->shape(), TensorShape({0, 0, 6}));
ASSERT_EQ(t->type(), DataType::DE_STRING);
ASSERT_EQ(t->SizeInBytes(), 0);
ASSERT_EQ(t->GetBuffer(), nullptr);
ASSERT_TRUE(!t->HasData());
rc = Tensor::CreateFromMemory(TensorShape({0}), DataType(DataType::DE_INT8), nullptr, &t);
ASSERT_TRUE(rc.IsOk());
ASSERT_EQ(t->shape(), TensorShape({0}));
ASSERT_EQ(t->type(), DataType::DE_INT8);
ASSERT_EQ(t->SizeInBytes(), 0);
ASSERT_EQ(t->GetBuffer(), nullptr);
ASSERT_TRUE(!t->HasData());
rc = Tensor::CreateFromMemory(TensorShape({0}), DataType(DataType::DE_STRING), nullptr, &t);
ASSERT_TRUE(rc.IsOk());
ASSERT_EQ(t->shape(), TensorShape({0}));
ASSERT_EQ(t->type(), DataType::DE_STRING);
ASSERT_EQ(t->SizeInBytes(), 0);
ASSERT_EQ(t->GetBuffer(), nullptr);
std::vector<uint32_t> values = {1, 2, 3, 0, 0, 0};
std::shared_ptr<Tensor> t2;
Tensor::CreateFromVector(values, &t2);
ASSERT_TRUE(t2->HasData());
t2->Invalidate();
ASSERT_TRUE(!t2->HasData());
}

View File

@ -46,8 +46,8 @@ class MindDataTestTokenizerOp : public UT::Common {
TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) { TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
MS_LOG(INFO) << "Doing TestUnicodeCharTokenizerOp."; MS_LOG(INFO) << "Doing TestUnicodeCharTokenizerOp.";
std::unique_ptr<UnicodeCharTokenizerOp> op(new UnicodeCharTokenizerOp(true)); std::unique_ptr<UnicodeCharTokenizerOp> op(new UnicodeCharTokenizerOp(true));
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Hello World!"); std::shared_ptr<Tensor> input;
TensorRow output; Tensor::CreateScalar<std::string>("Hello World!", &input); TensorRow output;
Status s = op->Compute(TensorRow(0, {input}), &output); Status s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 12); EXPECT_EQ(output[0]->Size(), 12);
@ -66,7 +66,7 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
CheckEqual(output[0], {10}, "d"); CheckEqual(output[0], {10}, "d");
CheckEqual(output[0], {11}, "!"); CheckEqual(output[0], {11}, "!");
input = std::make_shared<Tensor>("中国 你好!"); Tensor::CreateScalar<std::string>("中国 你好!", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -80,38 +80,38 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
CheckEqual(output[0], {4}, ""); CheckEqual(output[0], {4}, "");
CheckEqual(output[0], {5}, "!"); CheckEqual(output[0], {5}, "!");
input = std::make_shared<Tensor>(""); Tensor::CreateScalar<std::string>("", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
CheckEqual(output[0], {0}, ""); CheckEqual(output[0], {0}, "");
input = std::make_shared<Tensor>("H"); Tensor::CreateScalar<std::string>("H", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
CheckEqual(output[0], {0}, "H"); CheckEqual(output[0], {0}, "H");
input = std::make_shared<Tensor>(" "); Tensor::CreateScalar<std::string>(" ", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 2); EXPECT_EQ(output[0]->Size(), 2);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
CheckEqual(output[0], {0}, " "); CheckEqual(output[0], {0}, " ");
CheckEqual(output[0], {1}, " "); CheckEqual(output[0], {1}, " ");
input = std::make_shared<Tensor>(""); Tensor::CreateScalar<std::string>("", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
@ -121,10 +121,10 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
TEST_F(MindDataTestTokenizerOp, TestWhitespaceTokenizerOp) { TEST_F(MindDataTestTokenizerOp, TestWhitespaceTokenizerOp) {
MS_LOG(INFO) << "Doing TestWhitespaceTokenizerOp."; MS_LOG(INFO) << "Doing TestWhitespaceTokenizerOp.";
std::unique_ptr<WhitespaceTokenizerOp> op(new WhitespaceTokenizerOp(true)); std::unique_ptr<WhitespaceTokenizerOp> op(new WhitespaceTokenizerOp(true));
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China."); std::shared_ptr<Tensor> input;
TensorRow output; Tensor::CreateScalar<std::string>("Welcome to China.", &input); TensorRow output;
Status s = op->Compute(TensorRow(0, {input}), &output); Status s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 3); EXPECT_EQ(output[0]->Size(), 3);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
@ -132,37 +132,37 @@ TEST_F(MindDataTestTokenizerOp, TestWhitespaceTokenizerOp) {
CheckEqual(output[0], {1}, "to"); CheckEqual(output[0], {1}, "to");
CheckEqual(output[0], {2}, "China."); CheckEqual(output[0], {2}, "China.");
input = std::make_shared<Tensor>(" hello"); Tensor::CreateScalar<std::string>(" hello", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
CheckEqual(output[0], {0}, "hello"); CheckEqual(output[0], {0}, "hello");
input = std::make_shared<Tensor>("hello"); Tensor::CreateScalar<std::string>("hello", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
CheckEqual(output[0], {0}, "hello"); CheckEqual(output[0], {0}, "hello");
input = std::make_shared<Tensor>("hello "); Tensor::CreateScalar<std::string>("hello ", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
CheckEqual(output[0], {0}, "hello"); CheckEqual(output[0], {0}, "hello");
input = std::make_shared<Tensor>(" "); Tensor::CreateScalar<std::string>(" ", &input);
output.clear(); output.clear();
s = op->Compute(TensorRow(0, {input}), &output); s = op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
@ -174,8 +174,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
std::unique_ptr<UnicodeScriptTokenizerOp> keep_whitespace_op(new UnicodeScriptTokenizerOp(true, true)); std::unique_ptr<UnicodeScriptTokenizerOp> keep_whitespace_op(new UnicodeScriptTokenizerOp(true, true));
std::unique_ptr<UnicodeScriptTokenizerOp> skip_whitespace_op(new UnicodeScriptTokenizerOp(false, true)); std::unique_ptr<UnicodeScriptTokenizerOp> skip_whitespace_op(new UnicodeScriptTokenizerOp(false, true));
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京"); std::shared_ptr<Tensor> input;
TensorRow output; Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
TensorRow output;
Status s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); Status s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 10); EXPECT_EQ(output[0]->Size(), 10);
@ -204,10 +205,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
CheckEqual(output[0], {4}, "中国"); CheckEqual(output[0], {4}, "中国");
CheckEqual(output[0], {5}, "北京"); CheckEqual(output[0], {5}, "北京");
input = std::make_shared<Tensor>(" Welcome to 中国. "); Tensor::CreateScalar<std::string>(" Welcome to 中国. ", &input);
output.clear(); output.clear();
s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output); s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 4); EXPECT_EQ(output[0]->Size(), 4);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
@ -230,25 +230,23 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
CheckEqual(output[0], {6}, "."); CheckEqual(output[0], {6}, ".");
CheckEqual(output[0], {7}, " "); CheckEqual(output[0], {7}, " ");
input = std::make_shared<Tensor>("Hello"); Tensor::CreateScalar<std::string>("Hello", &input);
output.clear(); output.clear();
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
CheckEqual(output[0], {0}, "Hello"); CheckEqual(output[0], {0}, "Hello");
input = std::make_shared<Tensor>("H"); Tensor::CreateScalar<std::string>("H", &input);
output.clear(); output.clear();
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
CheckEqual(output[0], {0}, "H"); CheckEqual(output[0], {0}, "H");
input = std::make_shared<Tensor>(""); Tensor::CreateScalar<std::string>("", &input);
output.clear(); output.clear();
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -257,10 +255,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
MS_LOG(INFO) << "Out tensor7: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor7: " << output[0]->ToString();
CheckEqual(output[0], {0}, ""); CheckEqual(output[0], {0}, "");
input = std::make_shared<Tensor>("Hello中国Hello世界"); Tensor::CreateScalar<std::string>("Hello中国Hello世界", &input);
output.clear(); output.clear();
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk()); s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk()); EXPECT_EQ(output[0]->Size(), 4);
EXPECT_EQ(output[0]->Size(), 4);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor8: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor8: " << output[0]->ToString();
CheckEqual(output[0], {0}, "Hello"); CheckEqual(output[0], {0}, "Hello");
@ -268,15 +265,15 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
CheckEqual(output[0], {2}, "Hello"); CheckEqual(output[0], {2}, "Hello");
CheckEqual(output[0], {3}, "世界"); CheckEqual(output[0], {3}, "世界");
input = std::make_shared<Tensor>(" "); Tensor::CreateScalar<std::string>(" ", &input);
output.clear(); output.clear();
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
EXPECT_EQ(output[0]->Size(), 1); EXPECT_EQ(output[0]->Size(), 1);
EXPECT_EQ(output[0]->Rank(), 1); EXPECT_EQ(output[0]->Rank(), 1);
MS_LOG(INFO) << "Out tensor10: " << output[0]->ToString(); MS_LOG(INFO) << "Out tensor10: " << output[0]->ToString();
CheckEqual(output[0], {0}, " "); CheckEqual(output[0], {0}, " ");
input = std::make_shared<Tensor>(" "); Tensor::CreateScalar<std::string>(" ", &input);
output.clear(); output.clear();
s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output); s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -289,7 +286,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
TEST_F(MindDataTestTokenizerOp, TestCaseFold) { TEST_F(MindDataTestTokenizerOp, TestCaseFold) {
MS_LOG(INFO) << "Doing TestCaseFold."; MS_LOG(INFO) << "Doing TestCaseFold.";
std::unique_ptr<CaseFoldOp> case_fold_op(new CaseFoldOp()); std::unique_ptr<CaseFoldOp> case_fold_op(new CaseFoldOp());
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京"); std::shared_ptr<Tensor> input;
Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
Status s = case_fold_op->Compute(input, &output); Status s = case_fold_op->Compute(input, &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -305,7 +304,8 @@ TEST_F(MindDataTestTokenizerOp, TestNormalize) {
std::unique_ptr<NormalizeUTF8Op> nfkc_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkc)); std::unique_ptr<NormalizeUTF8Op> nfkc_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkc));
std::unique_ptr<NormalizeUTF8Op> nfd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfd)); std::unique_ptr<NormalizeUTF8Op> nfd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfd));
std::unique_ptr<NormalizeUTF8Op> nfkd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkd)); std::unique_ptr<NormalizeUTF8Op> nfkd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkd));
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(""); std::shared_ptr<Tensor> input;
Tensor::CreateScalar<std::string>("", &input);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
Status s = nfc_normalize_op->Compute(input, &output); Status s = nfc_normalize_op->Compute(input, &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -327,7 +327,8 @@ TEST_F(MindDataTestTokenizerOp, TestNormalize) {
TEST_F(MindDataTestTokenizerOp, TestRegexReplace) { TEST_F(MindDataTestTokenizerOp, TestRegexReplace) {
MS_LOG(INFO) << "Doing TestRegexReplace."; MS_LOG(INFO) << "Doing TestRegexReplace.";
std::unique_ptr<RegexReplaceOp> regex_replace_op(new RegexReplaceOp("\\s+", "_", true)); std::unique_ptr<RegexReplaceOp> regex_replace_op(new RegexReplaceOp("\\s+", "_", true));
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京"); std::shared_ptr<Tensor> input;
Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
Status s = regex_replace_op->Compute(input, &output); Status s = regex_replace_op->Compute(input, &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
@ -340,19 +341,20 @@ TEST_F(MindDataTestTokenizerOp, TestRegexReplace) {
TEST_F(MindDataTestTokenizerOp, TestRegexTokenizer) { TEST_F(MindDataTestTokenizerOp, TestRegexTokenizer) {
MS_LOG(INFO) << "Doing TestRegexTokenizerOp."; MS_LOG(INFO) << "Doing TestRegexTokenizerOp.";
std::unique_ptr<RegexTokenizerOp> regex_tokenizer_op(new RegexTokenizerOp("\\p{Cc}|\\p{Cf}|\\s+", "", true)); std::unique_ptr<RegexTokenizerOp> regex_tokenizer_op(new RegexTokenizerOp("\\p{Cc}|\\p{Cf}|\\s+", "", true));
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京"); std::shared_ptr<Tensor> input;
TensorRow output; Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
TensorRow output;
Status s = regex_tokenizer_op->Compute(TensorRow(0, {input}), &output); Status s = regex_tokenizer_op->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());
} }
TEST_F(MindDataTestTokenizerOp, TestBasicTokenizer) { TEST_F(MindDataTestTokenizerOp, TestBasicTokenizer) {
MS_LOG(INFO) << "Doing TestBasicTokenizer."; MS_LOG(INFO) << "Doing TestBasicTokenizer.";
//bool lower_case, bool keep_whitespace, // bool lower_case, bool keep_whitespace,
// NormalizeForm normalization_form, bool preserve_unused_token // NormalizeForm normalization_form, bool preserve_unused_token
std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false, std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false,true));
true)); std::shared_ptr<Tensor> input;
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. 中国\t北京"); Tensor::CreateScalar<std::string>("Welcome to China. 中国\t北京", &input);
TensorRow output; TensorRow output;
Status s = basic_tokenizer->Compute(TensorRow(0, {input}), &output); Status s = basic_tokenizer->Compute(TensorRow(0, {input}), &output);
EXPECT_TRUE(s.IsOk()); EXPECT_TRUE(s.IsOk());

View File

@ -35,17 +35,17 @@ class MindDataTestTruncatePairOp : public UT::Common {
TEST_F(MindDataTestTruncatePairOp, Basics) { TEST_F(MindDataTestTruncatePairOp, Basics) {
std::shared_ptr<Tensor> t1; std::shared_ptr<Tensor> t1;
Tensor::CreateTensor(&t1, std::vector<uint32_t>({1, 2, 3})); Tensor::CreateFromVector(std::vector<uint32_t>({1, 2, 3}), &t1);
std::shared_ptr<Tensor> t2; std::shared_ptr<Tensor> t2;
Tensor::CreateTensor(&t2, std::vector<uint32_t>({4, 5})); Tensor::CreateFromVector(std::vector<uint32_t>({4, 5}), &t2);
TensorRow in({t1, t2}); TensorRow in({t1, t2});
std::shared_ptr<TruncateSequencePairOp> op = std::make_shared<TruncateSequencePairOp>(4); std::shared_ptr<TruncateSequencePairOp> op = std::make_shared<TruncateSequencePairOp>(4);
TensorRow out; TensorRow out;
ASSERT_TRUE(op->Compute(in, &out).IsOk()); ASSERT_TRUE(op->Compute(in, &out).IsOk());
std::shared_ptr<Tensor> out1; std::shared_ptr<Tensor> out1;
Tensor::CreateTensor(&out1, std::vector<uint32_t>({1, 2})); Tensor::CreateFromVector(std::vector<uint32_t>({1, 2}), &out1);
std::shared_ptr<Tensor> out2; std::shared_ptr<Tensor> out2;
Tensor::CreateTensor(&out2, std::vector<uint32_t>({4, 5})); Tensor::CreateFromVector(std::vector<uint32_t>({4, 5}), &out2);
ASSERT_EQ(*out1, *out[0]); ASSERT_EQ(*out1, *out[0]);
ASSERT_EQ(*out2, *out[1]); ASSERT_EQ(*out2, *out[1]);
} }

View File

@ -43,16 +43,15 @@ class MindDataTestTypeCast : public UT::Common {
template<typename FROM, typename TO> template<typename FROM, typename TO>
void testCast(std::vector<FROM> values, const DataType &from, const DataType &to) { void testCast(std::vector<FROM> values, const DataType &from, const DataType &to) {
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({static_cast<int64_t>(values.size())}), std::shared_ptr<Tensor> t;
DataType(from), Tensor::CreateFromVector(values, &t);
reinterpret_cast<unsigned char *>(&values[0]));
std::unique_ptr<TypeCastOp> op(new TypeCastOp(to)); std::unique_ptr<TypeCastOp> op(new TypeCastOp(to));
EXPECT_TRUE(op->OneToOne()); EXPECT_TRUE(op->OneToOne());
std::shared_ptr<Tensor> output; std::shared_ptr<Tensor> output;
EXPECT_TRUE(op->Compute(t, &output)); EXPECT_TRUE(op->Compute(t, &output));
ASSERT_TRUE(t->shape() == output->shape()); ASSERT_TRUE(t->shape() == output->shape());
ASSERT_TRUE(DataType(to)==output->type()); ASSERT_TRUE(DataType(to) == output->type());
MS_LOG(DEBUG) << *output << std::endl; MS_LOG(DEBUG) << *output << std::endl;
auto out = output->begin<TO>(); auto out = output->begin<TO>();
auto v = values.begin(); auto v = values.begin();

View File

@ -16,7 +16,6 @@
Testing Mask op in DE Testing Mask op in DE
""" """
import numpy as np import numpy as np
import pytest
import mindspore.dataset as ds import mindspore.dataset as ds
import mindspore.dataset.text as text import mindspore.dataset.text as text
@ -55,9 +54,7 @@ def test_basics_str():
def test_exceptions(): def test_exceptions():
with pytest.raises(RuntimeError) as info: compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1], out2=[])
compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1, 2], out2=[5])
assert "Indices are empty, generated tensor would be empty" in str(info.value)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -121,21 +121,10 @@ def test_slice_exceptions():
slice_compare([1, 2, 3, 4, 5], 5) slice_compare([1, 2, 3, 4, 5], 5)
assert "Index 5 is out of bounds [0,5)" in str(info.value) assert "Index 5 is out of bounds [0,5)" in str(info.value)
with pytest.raises(RuntimeError) as info: slice_compare([1, 2, 3, 4, 5], slice(0))
slice_compare([1, 2, 3, 4, 5], slice(0)) slice_compare([1, 2, 3, 4, 5], slice(3, 1, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value) slice_compare([1, 2, 3, 4, 5], slice(5, 10, 1))
slice_compare([1, 2, 3, 4, 5], slice(-1, -5, 1))
with pytest.raises(RuntimeError) as info:
slice_compare([1, 2, 3, 4, 5], slice(3, 1, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value)
with pytest.raises(RuntimeError) as info:
slice_compare([1, 2, 3, 4, 5], slice(5, 10, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value)
with pytest.raises(RuntimeError) as info:
slice_compare([1, 2, 3, 4, 5], slice(-1, -5, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value)
def test_slice_all_str(): def test_slice_all_str():
@ -198,21 +187,10 @@ def test_slice_exceptions_str():
slice_compare([b"1", b"2", b"3", b"4", b"5"], 5) slice_compare([b"1", b"2", b"3", b"4", b"5"], 5)
assert "Index 5 is out of bounds [0,5)" in str(info.value) assert "Index 5 is out of bounds [0,5)" in str(info.value)
with pytest.raises(RuntimeError) as info: slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0))
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0)) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(3, 1, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value) slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(5, 10, 1))
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, 1))
with pytest.raises(RuntimeError) as info:
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(3, 1, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value)
with pytest.raises(RuntimeError) as info:
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(5, 10, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value)
with pytest.raises(RuntimeError) as info:
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, 1))
assert "Indices are empty, generated tensor would be empty." in str(info.value)
if __name__ == "__main__": if __name__ == "__main__":

View File

@ -0,0 +1,72 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
import numpy as np
import mindspore.dataset as ds
def test_tensor_empty():
def gen():
for _ in range(4):
(yield np.array([], dtype=np.int64), np.array([], dtype='S').reshape([0, 4]), np.array([1],
dtype=np.float64))
data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"])
for d in data:
np.testing.assert_array_equal(np.array([], dtype=np.int64), d[0])
np.testing.assert_array_equal(np.array([], dtype='S').reshape([0, 4]), d[1])
np.testing.assert_array_equal(np.array([1], dtype=np.float64), d[2])
def test_tensor_empty_map():
def gen():
for _ in range(4):
(yield np.array([], dtype=np.int64), np.array([], dtype='S'), np.array([1], dtype=np.float64))
data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"])
def func(x, y, z):
x = np.array([1], dtype=np.int64)
y = np.array(["Hi"], dtype='S')
z = np.array([], dtype=np.float64)
return x, y, z
data = data.map(input_columns=["col1", "col2", "col3"], operations=func)
for d in data:
np.testing.assert_array_equal(np.array([1], dtype=np.int64), d[0])
np.testing.assert_array_equal(np.array(["Hi"], dtype='S'), d[1])
np.testing.assert_array_equal(np.array([], dtype=np.float64), d[2])
def test_tensor_empty_batch():
def gen():
for _ in range(4):
(yield np.array([], dtype=np.int64), np.array([], dtype='S').reshape([0, 4]), np.array([1],
dtype=np.float64))
data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"]).batch(2)
for d in data:
np.testing.assert_array_equal(np.array([], dtype=np.int64).reshape([2, 0]), d[0])
np.testing.assert_array_equal(np.array([], dtype='S').reshape([2, 0, 4]), d[1])
np.testing.assert_array_equal(np.array([[1], [1]], dtype=np.float64), d[2])
if __name__ == '__main__':
test_tensor_empty()
test_tensor_empty_map()
test_tensor_empty_batch()