- Add checks and testing for empty tensors
- cleanup work on createTensor and Tensor's constructors
This commit is contained in:
parent
d8fcf269d6
commit
adfbc891d3
|
@ -511,8 +511,9 @@ Status DEPipeline::FetchDataFromTensorRow(const TensorRow &row,
|
|||
RETURN_IF_NOT_OK(s);
|
||||
if (data != nullptr) (*row_raw_data)[column_name] = std::move(*data);
|
||||
} else if (column_type == DataType::DE_STRING) {
|
||||
auto buffer = tensor->GetStringsBuffer();
|
||||
std::string ss(reinterpret_cast<const char *>(buffer)); // assume scalar string tensor
|
||||
std::string_view sv;
|
||||
RETURN_IF_NOT_OK(tensor->GetItemAt(&sv, {0})); // assume scalar string tensor
|
||||
std::string ss(sv);
|
||||
(*row_raw_data)[column_name] = std::move(ss);
|
||||
continue;
|
||||
} else {
|
||||
|
@ -1678,13 +1679,13 @@ Status DEPipeline::ParsePadInfo(py::handle value, PadInfo *pad_info) {
|
|||
if (py::isinstance<py::str>(tp[1])) {
|
||||
std::string pad_val_string = tp[1].is_none() ? "" : ToString(tp[1]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
Tensor::CreateTensor(&pad_val, std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar()),
|
||||
Tensor::CreateFromVector(std::vector<std::string>{pad_val_string}, TensorShape::CreateScalar(), &pad_val),
|
||||
"Cannot create pad_value Tensor");
|
||||
} else {
|
||||
float pad_val_float = tp[1].is_none() ? 0 : ToFloat(tp[1]);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(Tensor::CreateTensor(&pad_val, TensorImpl::kFlexible, TensorShape::CreateScalar(),
|
||||
DataType(DataType::DE_FLOAT32)),
|
||||
"Cannot create pad_value Tensor");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_val),
|
||||
"Cannot create pad_value Tensor");
|
||||
pad_val->SetItemAt<float>({}, pad_val_float);
|
||||
}
|
||||
(void)pad_info->insert({ToString(p.first), {shape, pad_val}});
|
||||
|
|
|
@ -340,7 +340,7 @@ void bindTensor(py::module *m) {
|
|||
(void)py::class_<Tensor, std::shared_ptr<Tensor>>(*m, "Tensor", py::buffer_protocol())
|
||||
.def(py::init([](py::array arr) {
|
||||
std::shared_ptr<Tensor> out;
|
||||
THROW_IF_ERROR(Tensor::CreateTensor(&out, arr));
|
||||
THROW_IF_ERROR(Tensor::CreateFromNpArray(arr, &out));
|
||||
return out;
|
||||
}))
|
||||
.def_buffer([](Tensor &tensor) {
|
||||
|
@ -364,7 +364,18 @@ void bindTensor(py::module *m) {
|
|||
});
|
||||
|
||||
(void)py::class_<TensorShape>(*m, "TensorShape")
|
||||
.def(py::init<py::list>())
|
||||
.def(py::init([](const py::list &list) {
|
||||
std::vector<dsize_t> list_c;
|
||||
for (auto &i : list) {
|
||||
if (!i.is_none()) {
|
||||
list_c.push_back(i.cast<int>());
|
||||
} else {
|
||||
list_c.push_back(TensorShape::kDimUnknown);
|
||||
}
|
||||
}
|
||||
TensorShape out(list_c);
|
||||
return out;
|
||||
}))
|
||||
.def("__str__", &TensorShape::ToString)
|
||||
.def("as_list", &TensorShape::AsPyList)
|
||||
.def("is_known", &TensorShape::known);
|
||||
|
|
|
@ -23,18 +23,35 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
CVTensor::CVTensor(const TensorShape &shape, const DataType &type) : Tensor(shape, type) {
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
|
||||
CVTensor::CVTensor(const TensorShape &shape, const DataType &type, const uchar *data) : Tensor(shape, type, data) {
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
|
||||
CVTensor::CVTensor(std::shared_ptr<Tensor> tensor) : Tensor(std::move(*tensor)) {
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
|
||||
Status CVTensor::CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out) {
|
||||
const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
|
||||
*out = std::allocate_shared<CVTensor>(*alloc, shape, type);
|
||||
int64_t byte_size = (*out)->SizeInBytes();
|
||||
// Don't allocate if we have a tensor with no elements.
|
||||
if (byte_size != 0) {
|
||||
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
|
||||
}
|
||||
|
||||
return (*out)->MatInit((*out)->GetMutableBuffer(), (*out)->shape_, (*out)->type_, &(*out)->mat_);
|
||||
}
|
||||
|
||||
Status CVTensor::CreateFromMat(const cv::Mat &mat, CVTensorPtr *out) {
|
||||
TensorPtr out_tensor;
|
||||
cv::Mat mat_local = mat;
|
||||
// if the input Mat's memory is not continuous, copy it to one block of memory
|
||||
if (!mat.isContinuous()) mat_local = mat.clone();
|
||||
TensorShape shape(mat.size, mat_local.type());
|
||||
DataType type = DataType::FromCVType(mat_local.type());
|
||||
RETURN_IF_NOT_OK(CreateFromMemory(shape, type, mat_local.data, &out_tensor));
|
||||
*out = AsCVTensor(out_tensor);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::pair<std::array<int, 2>, int> CVTensor::IsValidImage(const TensorShape &shape, const DataType &type) {
|
||||
std::array<int, 2> size = {1, 1};
|
||||
if (shape.Rank() <= 2 || (shape.Rank() == 3 && shape[2] <= CV_CN_MAX)) {
|
||||
|
@ -57,7 +74,8 @@ std::shared_ptr<CVTensor> CVTensor::AsCVTensor(std::shared_ptr<Tensor> t) {
|
|||
if (cv_t != nullptr) {
|
||||
return cv_t;
|
||||
} else {
|
||||
return std::make_shared<CVTensor>(t);
|
||||
const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
|
||||
return std::allocate_shared<CVTensor>(*alloc, t);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -97,5 +115,13 @@ void CVTensor::Squeeze() {
|
|||
Tensor::Squeeze();
|
||||
(void)this->MatInit(GetMutableBuffer(), shape_, type_, &mat_);
|
||||
}
|
||||
|
||||
Status CVTensor::MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat) {
|
||||
uchar *start = nullptr;
|
||||
TensorShape remaining({-1});
|
||||
RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining));
|
||||
RETURN_IF_NOT_OK(this->MatInit(start, remaining, type_, mat));
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -30,56 +30,60 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
using CVTensorPtr = std::shared_ptr<CVTensor>;
|
||||
class CVTensor : public Tensor {
|
||||
public:
|
||||
// Create an empty CVTensor of shape `shape` and type `type`.
|
||||
// @note The shape and type information should be known and valid.
|
||||
// @param shape TensorShape
|
||||
// @param type DataType
|
||||
CVTensor(const TensorShape &shape, const DataType &type);
|
||||
// Inherit Tensor's constructors
|
||||
using Tensor::Tensor;
|
||||
|
||||
// Create a CVTensor from a given buffer, shape and type.
|
||||
// @note This constructor allocates a new space in the memory and copies the buffer into it.
|
||||
// @note The buffer should be valid and the shape and type information should be known and valid.
|
||||
// @param shape TensorShape
|
||||
// @param type DataType
|
||||
// @param data unsigned char*, pointer to the data.
|
||||
CVTensor(const TensorShape &shape, const DataType &type, const uchar *data);
|
||||
|
||||
// Create a CVTensor from a given CV::Mat.
|
||||
// @note This constructor allocates a new space in the memory and copies the CV::Mat buffer into it.
|
||||
// @param mat CV::Mat
|
||||
explicit CVTensor(const cv::Mat &mat)
|
||||
: CVTensor(TensorShape(mat.size, mat.type()), DataType::FromCVType(mat.type()), mat.data) {}
|
||||
|
||||
~CVTensor() = default;
|
||||
|
||||
// Static function to cast a given Tensor as CVTensor. If the input tensor is already of type CVTensor,
|
||||
// this function would be treated as a no-op. Fot other tensor types, a new CVTensor is created based on the data
|
||||
// provided. The Passed Tensor will be invalidated.
|
||||
// @note there is no memory copying here, the buffer will be assigned to the constructed tensor.
|
||||
// @param tensor
|
||||
// @return CVTensor
|
||||
static std::shared_ptr<CVTensor> AsCVTensor(std::shared_ptr<Tensor> tensor);
|
||||
|
||||
// Create a CVTensor from a given tensor. The input tensor will be invalidated (i.e., the shape and type will be
|
||||
// set to unknown and the data buffer will point to null.
|
||||
// @note there is no memory copying here, the buffer will be assigned to the constructed tensor.
|
||||
// @param tensor
|
||||
/// Create a CVTensor from a given tensor. This constructor should not be used directly, use Create* instead.
|
||||
/// The input tensor will be invalidated (i.e., the shape and type will be
|
||||
/// set to unknown and the data buffer will point to null.
|
||||
/// \note there is no memory copying here, the buffer will be assigned to the constructed tensor.
|
||||
/// \param tensor
|
||||
explicit CVTensor(std::shared_ptr<Tensor> tensor);
|
||||
|
||||
// Getter function for the CV::Mat
|
||||
// @return
|
||||
/// Create CV tensor with type and shape. Items of the tensor would be uninitialized.
|
||||
/// \param shape [in] shape of the output tensor
|
||||
/// \param type [in] type of the output tensor
|
||||
/// \param out [out] Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateEmpty(const TensorShape &shape, DataType type, CVTensorPtr *out);
|
||||
|
||||
/// Create CV tensor from cv::Mat
|
||||
/// \note This constructor allocates a new space in the memory and copies the CV::Mat buffer into it.
|
||||
/// \param mat [in] cv::Mat to be copied into the new tensor.
|
||||
/// \param out [out] Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromMat(const cv::Mat &mat, CVTensorPtr *out);
|
||||
|
||||
~CVTensor() override = default;
|
||||
|
||||
/// Static function to cast a given Tensor as CVTensor. If the input tensor is already of type CVTensor,
|
||||
/// this function would be treated as a no-op. Fot other tensor types, a new CVTensor is created based on the data
|
||||
/// provided. The Passed Tensor will be invalidated.
|
||||
/// \note the input tensor will be invalidated.
|
||||
/// \note there is no memory copying here, the buffer will be assigned to the constructed tensor.
|
||||
/// \param tensor [in]
|
||||
/// \return CVTensor
|
||||
static std::shared_ptr<CVTensor> AsCVTensor(std::shared_ptr<Tensor> tensor);
|
||||
|
||||
/// Get a reference to the CV::Mat
|
||||
/// \return a reference to the internal CV::Mat
|
||||
cv::Mat mat() const { return mat_; }
|
||||
|
||||
// Static function to check if the passed information (shape and type) can be treated as a valid description
|
||||
// of an image in OpenCV. Moreover, it returns OpenCV shape and type
|
||||
// For example, if the shape is <512,512,3> and type is DE_UINT8, the output would be [512,512] and CV_8UC3.
|
||||
// In case of invalid shape or type, the function will return pair<null,0>
|
||||
// @param shape TensorShape
|
||||
// @param type DataType
|
||||
// @return std::pair of OpenCV shape and type
|
||||
std::pair<std::array<int, 2>, int> IsValidImage(const TensorShape &shape, const DataType &type);
|
||||
/// Get a copy of the CV::Mat
|
||||
/// \return a copy of internal CV::Mat
|
||||
cv::Mat matCopy() const { return mat_.clone(); }
|
||||
|
||||
/// Static function to check if the passed information (shape and type) can be treated as a valid description
|
||||
/// of an image in OpenCV. Moreover, it returns OpenCV shape and type
|
||||
/// For example, if the shape is <512,512,3> and type is DE_UINT8, the output would be [512,512] and CV_8UC3.
|
||||
/// In case of invalid shape or type, the function will return pair<null,0>
|
||||
/// \param shape [in] TensorShape
|
||||
/// \param type [in] DataType
|
||||
/// \return std::pair of OpenCV shape and type
|
||||
static std::pair<std::array<int, 2>, int> IsValidImage(const TensorShape &shape, const DataType &type);
|
||||
|
||||
Status Reshape(const TensorShape &shape) override;
|
||||
|
||||
|
@ -87,18 +91,19 @@ class CVTensor : public Tensor {
|
|||
|
||||
void Squeeze() override;
|
||||
|
||||
Status Mat(const std::vector<dsize_t> &index, cv::Mat *mat) {
|
||||
uchar *start = nullptr;
|
||||
TensorShape remaining({-1});
|
||||
RETURN_IF_NOT_OK(this->StartAddrOfIndex(index, &start, &remaining));
|
||||
RETURN_IF_NOT_OK(this->MatInit(start, remaining, type_, mat));
|
||||
return Status::OK();
|
||||
}
|
||||
Status MatAtIndex(const std::vector<dsize_t> &index, cv::Mat *mat);
|
||||
|
||||
private:
|
||||
/// Opencv Mat object wrapping the raw data of the tensor.
|
||||
/// Modifying the content of the matrix, modifies the tensor.
|
||||
cv::Mat mat_;
|
||||
|
||||
// Initialize CV::Mat with the data_, shape_ and type_
|
||||
/// Create cv::Mat from data, TensorShape and DataType
|
||||
/// \param data [in] Pointer to the data in memory.
|
||||
/// \param shape [in] Shape of the tensor.
|
||||
/// \param type [in] Type of the tensor.
|
||||
/// \param mat [out] cv::Mat initialized with the provided data.
|
||||
/// \return Status code
|
||||
Status MatInit(uchar *data, const TensorShape &shape, const DataType &type, cv::Mat *mat);
|
||||
};
|
||||
} // namespace dataset
|
||||
|
|
|
@ -284,6 +284,11 @@ inline DataType DataType::FromCType<std::string_view>() {
|
|||
return DataType(DataType::DE_STRING);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline DataType DataType::FromCType<std::string>() {
|
||||
return DataType(DataType::DE_STRING);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline bool DataType::IsLooselyCompatible<bool>() const {
|
||||
return type_ == DataType::DE_BOOL;
|
||||
|
|
|
@ -59,49 +59,11 @@ Tensor::Tensor(const TensorShape &shape, const DataType &type) : shape_(shape),
|
|||
data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
|
||||
}
|
||||
|
||||
Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data) : Tensor(shape, type) {
|
||||
if (type.IsNumeric()) {
|
||||
// If the data pointer was given, then we can also populate the tensor with data
|
||||
if (data != nullptr) {
|
||||
// Given the shape/type of this tensor, compute the data size and copy in the input bytes.
|
||||
int64_t byte_size = this->SizeInBytes();
|
||||
Status s = this->AllocateBuffer(byte_size); // Allocates data_ inside itself
|
||||
if (s.IsOk() && data_ != nullptr) {
|
||||
int ret_code = memcpy_s(data_, byte_size, data, byte_size);
|
||||
if (ret_code != 0) {
|
||||
MS_LOG(ERROR) << "Failed to copy data into Tensor!";
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Failed to create memory for Tensor!";
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Type should be numeric to use this constructor.";
|
||||
}
|
||||
}
|
||||
|
||||
Tensor::Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length)
|
||||
: Tensor(shape, type) {
|
||||
// If the data pointer was given, then we can also populate the tensor with data
|
||||
if (data != nullptr) {
|
||||
// Allocates data_ inside itself
|
||||
Status s = AllocateBuffer(length);
|
||||
if (s.IsError()) {
|
||||
MS_LOG(ERROR) << "Failed to create memory for Tensor!";
|
||||
}
|
||||
if (data_ != nullptr) {
|
||||
int ret_code = memcpy_s(data_, length, data, length);
|
||||
if (ret_code != 0) {
|
||||
MS_LOG(ERROR) << "Failed to copy data into Tensor!";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Tensor::Tensor(Tensor &&other) noexcept
|
||||
: shape_(other.shape()),
|
||||
type_(other.type()),
|
||||
data_(other.GetMutableBuffer()),
|
||||
data_end_(other.data_end_),
|
||||
data_allocator_(std::move(other.data_allocator_)) {
|
||||
other.Invalidate();
|
||||
}
|
||||
|
@ -117,118 +79,61 @@ Tensor &Tensor::operator=(Tensor &&other) noexcept {
|
|||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
Tensor::Tensor(const std::vector<std::string> &strings, const TensorShape &shape)
|
||||
: Tensor(TensorShape({static_cast<dsize_t>(strings.size())}), DataType(DataType::DE_STRING)) {
|
||||
auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
|
||||
dsize_t total_length = std::accumulate(strings.begin(), strings.end(), 0, length_sum);
|
||||
|
||||
// total bytes needed = offset array + strings
|
||||
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
|
||||
// strings will be null-terminated --> need 1 extra byte per element
|
||||
dsize_t num_bytes = (kOffsetSize + 1) * shape_.NumOfElements() + kOffsetSize + total_length;
|
||||
|
||||
data_ = data_allocator_->allocate(num_bytes);
|
||||
|
||||
auto offset_arr = reinterpret_cast<offset_t *>(data_);
|
||||
uchar *buf = GetStringsBuffer();
|
||||
|
||||
offset_t offset = buf - data_; // the first string will start here
|
||||
uint32_t i = 0;
|
||||
for (const auto &str : strings) {
|
||||
// insert the start index of the string.
|
||||
offset_arr[i++] = offset;
|
||||
// total bytes are reduced by kOffsetSize
|
||||
num_bytes -= kOffsetSize;
|
||||
// insert actual string
|
||||
int ret_code = memcpy_s(data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
|
||||
if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
|
||||
// next string will be stored right after the current one.
|
||||
offset = offset + str.length() + 1;
|
||||
// total bytes are reduced by the length of the string
|
||||
num_bytes -= str.length() + 1;
|
||||
Status Tensor::CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(shape.known(), "Invalid shape.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type != DataType::DE_UNKNOWN, "Invalid data type.");
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, shape, type);
|
||||
// if it's a string tensor and it has no elements, Just initialize the shape and type.
|
||||
if (!type.IsNumeric() && shape.NumOfElements() == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
// store one more offset value so we can get the length of the last string
|
||||
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
|
||||
offset_arr[i] = offset;
|
||||
|
||||
this->data_end_ = data_ + offset_arr[i];
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type.IsNumeric(), "Number of elements is not 0. The type should be numeric.");
|
||||
|
||||
MS_ASSERT(num_bytes == 0);
|
||||
if (shape.known()) Tensor::Reshape(shape);
|
||||
int64_t byte_size = (*out)->SizeInBytes();
|
||||
// Don't allocate if we have a tensor with no elements.
|
||||
if (byte_size != 0) {
|
||||
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out) {
|
||||
RETURN_IF_NOT_OK(CreateEmpty(shape, type, out));
|
||||
if (src != nullptr) {
|
||||
// Given the shape/type of this tensor, compute the data size and copy in the input bytes.
|
||||
int64_t byte_size = (*out)->SizeInBytes();
|
||||
int ret_code = memcpy_s((*out)->data_, byte_size, src, byte_size);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Tensor::Tensor(const dataengine::BytesList &bytes_list, const TensorShape &shape)
|
||||
: Tensor(TensorShape({static_cast<dsize_t>(bytes_list.value_size())}), DataType(DataType::DE_STRING)) {
|
||||
// total bytes needed = offset array + strings
|
||||
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
|
||||
// strings will be null-terminated --> need 1 extra byte per element
|
||||
dsize_t num_bytes = (kOffsetSize)*shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
|
||||
|
||||
data_ = data_allocator_->allocate(num_bytes);
|
||||
|
||||
auto offset_arr = reinterpret_cast<offset_t *>(data_);
|
||||
uchar *buf = GetStringsBuffer();
|
||||
|
||||
offset_t offset = buf - data_; // the first string will start here
|
||||
uint32_t i = 0;
|
||||
for (; i < bytes_list.value_size(); i++) {
|
||||
const std::string &str = bytes_list.value(i);
|
||||
// insert the start index of the string.
|
||||
offset_arr[i] = offset;
|
||||
// total bytes are reduced by kOffsetSize
|
||||
num_bytes -= kOffsetSize;
|
||||
// insert actual string
|
||||
int ret_code = memcpy_s(data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
|
||||
if (ret_code != 0) {
|
||||
MS_LOG(ERROR) << "Cannot copy string into Tensor";
|
||||
}
|
||||
// next string will be stored right after the current one.
|
||||
offset = offset + str.length() + 1;
|
||||
// total bytes are reduced by the length of the string
|
||||
num_bytes -= str.length() + 1;
|
||||
}
|
||||
// store one more offset value so we can get the length of the last string
|
||||
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
|
||||
offset_arr[i] = offset;
|
||||
|
||||
data_end_ = data_ + offset_arr[i];
|
||||
|
||||
MS_ASSERT(num_bytes == 0);
|
||||
if (shape.known()) Tensor::Reshape(shape);
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, TensorImpl tensor_impl, const TensorShape &shape,
|
||||
DataType type, const unsigned char *data) {
|
||||
if (!shape.known()) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid shape.");
|
||||
}
|
||||
if (type == DataType::DE_UNKNOWN) {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data type.");
|
||||
Status Tensor::CreateFromMemory(const TensorShape &shape, const DataType &type, const unsigned char *src,
|
||||
const dsize_t &length, TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(src != nullptr, "Pointer to source data is null.");
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, shape, type);
|
||||
if (type.IsNumeric()) {
|
||||
dsize_t calculated_length = (*out)->SizeInBytes();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(calculated_length == length, "Length of source data does not match the shape.");
|
||||
} else {
|
||||
// min_length is the length of a tensor with empty strings
|
||||
// min_length = the number of bytes needed to store the offsets + 1 byte for each element
|
||||
dsize_t min_length = (shape.NumOfElements() + 1) * kOffsetSize + shape.NumOfElements();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(min_length <= length, "Length of source data does not match the shape.");
|
||||
}
|
||||
|
||||
switch (tensor_impl) {
|
||||
case TensorImpl::kFlexible: {
|
||||
// The flex tensor is really just the base class tensor implementation
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*ptr = std::allocate_shared<Tensor>(*alloc, shape, type, data);
|
||||
break;
|
||||
}
|
||||
case TensorImpl::kCv: {
|
||||
const CVTensorAlloc *alloc = GlobalContext::Instance()->cv_tensor_allocator();
|
||||
*ptr = std::allocate_shared<CVTensor>(*alloc, shape, type, data);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
std::string err_msg("Invalid tensor implementation type.");
|
||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||
}
|
||||
}
|
||||
return Status::OK(); // returns base-class shared_ptr
|
||||
RETURN_IF_NOT_OK((*out)->AllocateBuffer(length));
|
||||
int ret_code = memcpy_s((*out)->data_, length, src, length);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(ret_code == 0, "Failed to copy data into tensor.");
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
||||
Status Tensor::CreateFromNpString(py::array arr, std::shared_ptr<Tensor> *out) {
|
||||
std::vector<dsize_t> shape;
|
||||
for (dsize_t i = 0; i < arr.ndim(); i++) {
|
||||
shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
|
||||
|
@ -244,34 +149,38 @@ Status Tensor::CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::arr
|
|||
|
||||
arr.resize(shape); // resize arr back to the original shape
|
||||
|
||||
return CreateTensor(ptr, strings, TensorShape{shape});
|
||||
return CreateFromVector(strings, TensorShape{shape}, out);
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
||||
Status Tensor::CreateFromNpArray(const py::array &arr, std::shared_ptr<Tensor> *out) {
|
||||
if (DataType::FromNpArray(arr) == DataType::DE_STRING) {
|
||||
return CreateTensorFromNumpyString(ptr, arr);
|
||||
return CreateFromNpString(arr, out);
|
||||
}
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*ptr = std::allocate_shared<Tensor>(*alloc, TensorShape({}), DataType(DataType::DE_UNKNOWN));
|
||||
*out = std::allocate_shared<Tensor>(*alloc, TensorShape::CreateScalar(), DataType(DataType::DE_UNKNOWN));
|
||||
|
||||
std::vector<dsize_t> shape;
|
||||
for (dsize_t i = 0; i < arr.ndim(); i++) {
|
||||
shape.push_back(static_cast<dsize_t>(arr.shape()[i]));
|
||||
}
|
||||
|
||||
(*ptr)->shape_ = TensorShape(shape);
|
||||
(*ptr)->type_ = DataType::FromNpArray(arr);
|
||||
if (!(*ptr)->shape_.known()) RETURN_STATUS_UNEXPECTED("Invalid shape.");
|
||||
(*out)->shape_ = TensorShape(shape);
|
||||
(*out)->type_ = DataType::FromNpArray(arr);
|
||||
if (!(*out)->shape_.known()) RETURN_STATUS_UNEXPECTED("Invalid shape.");
|
||||
|
||||
if ((*ptr)->type_ == DataType::DE_UNKNOWN) RETURN_STATUS_UNEXPECTED("Invalid data type.");
|
||||
if ((*out)->type_ == DataType::DE_UNKNOWN) RETURN_STATUS_UNEXPECTED("Invalid data type.");
|
||||
|
||||
std::shared_ptr<MemoryPool> global_pool = GlobalContext::Instance()->mem_pool();
|
||||
(*ptr)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
|
||||
int64_t byte_size = (*ptr)->SizeInBytes();
|
||||
RETURN_IF_NOT_OK((*ptr)->AllocateBuffer(byte_size));
|
||||
(*out)->data_allocator_ = std::make_unique<Allocator<unsigned char>>(global_pool);
|
||||
int64_t byte_size = (*out)->SizeInBytes();
|
||||
if (byte_size == 0) {
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK((*out)->AllocateBuffer(byte_size));
|
||||
|
||||
unsigned char *data = static_cast<unsigned char *>(arr.request().ptr);
|
||||
if ((*ptr)->data_ == nullptr) {
|
||||
if ((*out)->data_ == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("Failed to create memory for Tensor.");
|
||||
}
|
||||
|
||||
|
@ -282,61 +191,89 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr) {
|
|||
|
||||
// check if strides are contiguous
|
||||
bool is_strided = false;
|
||||
dsize_t count = (*ptr)->shape_.NumOfElements();
|
||||
dsize_t count = (*out)->shape_.NumOfElements();
|
||||
for (size_t i = 0; i < shape.size(); i++) {
|
||||
count /= shape[i];
|
||||
if (strides[i] != (*ptr)->type_.SizeInBytes() * count) {
|
||||
if (strides[i] != (*out)->type_.SizeInBytes() * count) {
|
||||
is_strided = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_strided) {
|
||||
RETURN_IF_NOT_OK(CopyStridedArray((*ptr)->data_, data, shape, strides, (*ptr)->type_.SizeInBytes()));
|
||||
RETURN_IF_NOT_OK(CopyStridedArray((*out)->data_, data, shape, strides, (*out)->type_.SizeInBytes()));
|
||||
} else {
|
||||
int ret_code = memcpy_s((*ptr)->data_, byte_size, data, byte_size);
|
||||
int ret_code = memcpy_s((*out)->data_, byte_size, data, byte_size);
|
||||
if (ret_code != 0) {
|
||||
RETURN_STATUS_UNEXPECTED("Failed to copy data into Tensor.");
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK(); // returns base-class shared_ptr
|
||||
return Status::OK();
|
||||
}
|
||||
#endif
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
|
||||
const TensorShape &shape) {
|
||||
Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out) {
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*ptr = std::allocate_shared<Tensor>(*alloc, strings, shape);
|
||||
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(bytes_list.value_size())}),
|
||||
DataType(DataType::DE_STRING));
|
||||
// total bytes needed = offset array + strings
|
||||
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
|
||||
// strings will be null-terminated --> need 1 extra byte per element
|
||||
dsize_t num_bytes = (kOffsetSize) * (*out)->shape_.NumOfElements() + kOffsetSize + bytes_list.ByteSizeLong();
|
||||
|
||||
(*out)->data_ = (*out)->data_allocator_->allocate(num_bytes);
|
||||
|
||||
auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
|
||||
uchar *buf = (*out)->GetStringsBuffer();
|
||||
|
||||
offset_t offset = buf - (*out)->data_; // the first string will start here
|
||||
uint32_t i = 0;
|
||||
for (; i < bytes_list.value_size(); i++) {
|
||||
const std::string &str = bytes_list.value(i);
|
||||
// insert the start index of the string.
|
||||
offset_arr[i] = offset;
|
||||
// total bytes are reduced by kOffsetSize
|
||||
num_bytes -= kOffsetSize;
|
||||
// insert actual string
|
||||
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
|
||||
if (ret_code != 0) {
|
||||
MS_LOG(ERROR) << "Cannot copy string into Tensor";
|
||||
}
|
||||
// next string will be stored right after the current one.
|
||||
offset = offset + str.length() + 1;
|
||||
// total bytes are reduced by the length of the string
|
||||
num_bytes -= str.length() + 1;
|
||||
}
|
||||
// store one more offset value so we can get the length of the last string
|
||||
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
|
||||
offset_arr[i] = offset;
|
||||
|
||||
(*out)->data_end_ = (*out)->data_ + offset_arr[i];
|
||||
|
||||
MS_ASSERT(num_bytes == 0);
|
||||
(*out)->Reshape(shape);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape) {
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*ptr = std::allocate_shared<Tensor>(*alloc, bytes_list, shape);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &file_path) {
|
||||
Status Tensor::CreateFromFile(const std::string &path, std::shared_ptr<Tensor> *out) {
|
||||
std::ifstream fs;
|
||||
fs.open(file_path, std::ios::binary | std::ios::in);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + file_path);
|
||||
fs.open(path, std::ios::binary | std::ios::in);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!fs.fail(), "Fail to open file: " + path);
|
||||
int64_t num_bytes = fs.seekg(0, std::ios::end).tellg();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(fs.seekg(0, std::ios::beg).good(), "Fail to find size of file");
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(ptr, TensorImpl::kFlexible, TensorShape{num_bytes}, DataType(DataType::DE_UINT8)));
|
||||
int64_t written_bytes = fs.read(reinterpret_cast<char *>((*ptr)->GetMutableBuffer()), num_bytes).gcount();
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape{num_bytes}, DataType(DataType::DE_UINT8), out));
|
||||
int64_t written_bytes = fs.read(reinterpret_cast<char *>((*out)->GetMutableBuffer()), num_bytes).gcount();
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(written_bytes == num_bytes && fs.good(), "Error in writing to tensor");
|
||||
fs.close();
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape, const DataType &type, dsize_t pad_size) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(ptr, TensorImpl::kFlexible, shape, type));
|
||||
Status Tensor::CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
|
||||
const DataType &type, dsize_t pad_size, TensorPtr *out) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, type, out));
|
||||
|
||||
unsigned char *current_tensor_addr = (*ptr)->GetMutableBuffer();
|
||||
unsigned char *current_tensor_addr = (*out)->GetMutableBuffer();
|
||||
int64_t tensor_bytes_remaining = bytes_list.value_size() * pad_size;
|
||||
|
||||
for (int i = 0; i < bytes_list.value_size(); i++) {
|
||||
|
@ -368,7 +305,7 @@ Status Tensor::CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::Byte
|
|||
// Here we convert array C to array A, by memcpy index by index (Note that not all elements in C is copied)
|
||||
Status Tensor::CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
|
||||
std::vector<dsize_t> strides, uint8_t type_size) {
|
||||
dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<dsize_t>());
|
||||
dsize_t size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<>());
|
||||
for (dsize_t i = 0; i < size; ++i) {
|
||||
dsize_t offset = 0;
|
||||
dsize_t count = i;
|
||||
|
@ -429,29 +366,29 @@ void Tensor::PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) c
|
|||
MS_ASSERT(data_);
|
||||
|
||||
switch (type_.value()) {
|
||||
CASE_PRINT_HEX(DataType::DE_BOOL, bool);
|
||||
CASE_PRINT_HEX(DataType::DE_BOOL, bool)
|
||||
|
||||
CASE_PRINT_HEX(DataType::DE_INT8, int8_t);
|
||||
CASE_PRINT_HEX(DataType::DE_INT8, int8_t)
|
||||
|
||||
CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t);
|
||||
CASE_PRINT_HEX(DataType::DE_UINT8, uint8_t)
|
||||
|
||||
CASE_PRINT(DataType::DE_INT16, int16_t);
|
||||
CASE_PRINT(DataType::DE_INT16, int16_t)
|
||||
|
||||
CASE_PRINT(DataType::DE_UINT16, uint16_t);
|
||||
CASE_PRINT(DataType::DE_UINT16, uint16_t)
|
||||
|
||||
CASE_PRINT(DataType::DE_INT32, int32_t);
|
||||
CASE_PRINT(DataType::DE_INT32, int32_t)
|
||||
|
||||
CASE_PRINT(DataType::DE_UINT32, uint32_t);
|
||||
CASE_PRINT(DataType::DE_UINT32, uint32_t)
|
||||
|
||||
CASE_PRINT(DataType::DE_INT64, int64_t);
|
||||
CASE_PRINT(DataType::DE_INT64, int64_t)
|
||||
|
||||
CASE_PRINT(DataType::DE_UINT64, uint64_t);
|
||||
CASE_PRINT(DataType::DE_UINT64, uint64_t)
|
||||
|
||||
CASE_PRINT(DataType::DE_FLOAT16, float16);
|
||||
CASE_PRINT(DataType::DE_FLOAT16, float16)
|
||||
|
||||
CASE_PRINT(DataType::DE_FLOAT32, float);
|
||||
CASE_PRINT(DataType::DE_FLOAT32, float)
|
||||
|
||||
CASE_PRINT(DataType::DE_FLOAT64, double);
|
||||
CASE_PRINT(DataType::DE_FLOAT64, double)
|
||||
|
||||
case DataType::DE_STRING: {
|
||||
std::string_view o{""};
|
||||
|
@ -501,50 +438,14 @@ void Tensor::Print(std::ostream &out) const {
|
|||
}
|
||||
}
|
||||
Status Tensor::AllocateBuffer(const dsize_t &length) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_allocator_);
|
||||
if (data_ == nullptr) {
|
||||
if (data_allocator_ != nullptr) {
|
||||
data_ = data_allocator_->allocate(length);
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
data_end_ = data_ + length;
|
||||
} else {
|
||||
data_ = static_cast<unsigned char *>(malloc(length));
|
||||
data_end_ = data_ + length;
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
}
|
||||
data_ = data_allocator_->allocate(length);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(data_ != nullptr, "Failed to allocate memory for tensor.");
|
||||
data_end_ = data_ + length;
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
const unsigned char *Tensor::GetBuffer() const {
|
||||
// This version cannot modify anything. data_ could possibly be null.
|
||||
return data_;
|
||||
}
|
||||
|
||||
// check for empty
|
||||
bool Tensor::HasData() const {
|
||||
if (data_ == nullptr) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned char *Tensor::GetMutableBuffer() {
|
||||
if (!shape_.known() || type_ == DataType::DE_UNKNOWN) {
|
||||
return nullptr;
|
||||
}
|
||||
// If the data area is already created, return the pointer to it
|
||||
if (data_ != nullptr) {
|
||||
return data_;
|
||||
} else {
|
||||
// If the data area is not created, then identify the memory size based
|
||||
// on the shape and type and allocate it.
|
||||
if (this->AllocateBuffer(this->SizeInBytes()).IsOk()) {
|
||||
return data_;
|
||||
} else {
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Status Tensor::Reshape(const TensorShape &shape) {
|
||||
if (shape.NumOfElements() == shape_.NumOfElements()) {
|
||||
|
@ -628,7 +529,7 @@ Status Tensor::InsertTensor(const std::vector<dsize_t> &ind, const std::shared_p
|
|||
err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : "";
|
||||
err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : "";
|
||||
uchar *start_addr_of_ind = nullptr;
|
||||
TensorShape remaining_shape({-1});
|
||||
TensorShape remaining_shape = TensorShape::CreateUnknownRankShape();
|
||||
err_msg += (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : "";
|
||||
err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : "";
|
||||
if (!err_msg.empty()) {
|
||||
|
@ -697,7 +598,7 @@ Status Tensor::ExpandDim(const dsize_t &axis) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
std::vector<dsize_t> Tensor::Strides() {
|
||||
std::vector<dsize_t> Tensor::Strides() const {
|
||||
std::vector<dsize_t> strides = shape_.Strides();
|
||||
uint8_t size = type_.SizeInBytes();
|
||||
std::transform(strides.begin(), strides.end(), strides.begin(), [&size](const auto &c) { return c * size; });
|
||||
|
@ -765,7 +666,6 @@ Status Tensor::GetItemAt(std::string_view *o, const std::vector<dsize_t> &index)
|
|||
#ifdef ENABLE_PYTHON
|
||||
// return data as numpy, should return status
|
||||
Status Tensor::GetDataAsNumpy(py::array *data) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
RETURN_UNEXPECTED_IF_NULL(data);
|
||||
if (type_ == DataType::DE_BOOL) {
|
||||
*data = py::array_t<bool>(shape_.AsVector(), reinterpret_cast<bool *>(data_));
|
||||
|
@ -974,7 +874,9 @@ Status Tensor::CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vect
|
|||
}
|
||||
Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(shape_.Rank() == 1, "Currently Slice work with rank 1 tensors only.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(!indices.empty(), "Indices are empty, generated tensor would be empty.");
|
||||
if (indices.empty()) {
|
||||
return CreateEmpty(TensorShape({0}), type_, out);
|
||||
}
|
||||
if (type_.IsNumeric()) {
|
||||
return SliceNumeric(out, indices);
|
||||
} else {
|
||||
|
@ -982,8 +884,7 @@ Status Tensor::Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &i
|
|||
}
|
||||
}
|
||||
Status Tensor::SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices) {
|
||||
RETURN_IF_NOT_OK(
|
||||
CreateTensor(out, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(indices.size())}), type_));
|
||||
RETURN_IF_NOT_OK(CreateEmpty(TensorShape({static_cast<dsize_t>(indices.size())}), type_, out));
|
||||
(*out)->GetMutableBuffer();
|
||||
dsize_t out_index = 0;
|
||||
dsize_t dim_length = shape_[0];
|
||||
|
@ -1027,7 +928,7 @@ Status Tensor::SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize
|
|||
GetItemAt(&sv, {cur_index});
|
||||
strings.emplace_back(sv);
|
||||
}
|
||||
return CreateTensor(out, strings);
|
||||
return CreateFromVector(strings, TensorShape({static_cast<dsize_t>(strings.size())}), out);
|
||||
}
|
||||
|
||||
} // namespace dataset
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "pybind11/stl.h"
|
||||
#endif
|
||||
|
||||
#include "common/utils.h"
|
||||
#include "minddata/dataset/core/constants.h"
|
||||
#include "minddata/dataset/core/data_type.h"
|
||||
#include "minddata/dataset/core/tensor_shape.h"
|
||||
|
@ -50,170 +51,155 @@ class Allocator;
|
|||
|
||||
using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
|
||||
using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors
|
||||
using offset_t = uint32_t; // type of offset values to store strings locations
|
||||
using TensorPtr = std::shared_ptr<Tensor>;
|
||||
|
||||
class Tensor {
|
||||
public:
|
||||
Tensor() = delete;
|
||||
|
||||
// Create a new tensor, does not internally allocate storage. This constructor is protected, use CreateTensor.
|
||||
// @note The shape and type information should be known and valid.
|
||||
// @param shape TensorShape
|
||||
// @param type DataType
|
||||
Tensor(const TensorShape &shape, const DataType &type);
|
||||
|
||||
// Create a new tensor, allocates storage and copies in data. This constructor is protected, use CreateTensor.
|
||||
// @note The buffer should be valid and the shape and type information should be known and valid.
|
||||
// @param shape TensorShape
|
||||
// @param type DataType
|
||||
// @param data unsigned char*, pointer to the data.
|
||||
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data);
|
||||
|
||||
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length);
|
||||
|
||||
Tensor(const Tensor &other) = delete;
|
||||
|
||||
Tensor &operator=(const Tensor &other) = delete;
|
||||
|
||||
/// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead
|
||||
/// \note The shape and type information should be known and valid
|
||||
/// \note The constructor does not allocate data
|
||||
/// \param shape TensorShape
|
||||
/// \param type DataType
|
||||
Tensor(const TensorShape &shape, const DataType &type);
|
||||
|
||||
/// Move constructor
|
||||
/// \param other Tensor to be moved
|
||||
Tensor(Tensor &&other) noexcept;
|
||||
|
||||
/// Move assigment operator
|
||||
/// \param other Tensor to be moved
|
||||
Tensor &operator=(Tensor &&other) noexcept;
|
||||
|
||||
Status AllocateBuffer(const dsize_t &length);
|
||||
/// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out);
|
||||
|
||||
// type of offest values to store strings information
|
||||
using offset_t = uint32_t;
|
||||
// const of the size of the offset variable
|
||||
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
|
||||
// Tensor base class which holds the data in an unsigned char* buffer.
|
||||
/// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type.
|
||||
/// Data will be copied into the new created tensor.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[in] src pointer to the source data
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out);
|
||||
|
||||
// Construct a scalar string Tensor
|
||||
explicit Tensor(const std::string &str) : Tensor(std::vector<std::string>{str}, TensorShape::CreateScalar()) {}
|
||||
/// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[in] src pointer to the source data
|
||||
/// \param[in] length length of the src data
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src,
|
||||
const dsize_t &length, TensorPtr *out);
|
||||
|
||||
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is
|
||||
// the size of the vector `strings`.
|
||||
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
|
||||
// Thr offset array will store one extra value to find the length of the last string.
|
||||
// OFFSET1, OFFSET2, ..., OFFSETn+1, STRING1, STRING2, ..., STRINGn
|
||||
// The value of each offset is the start index of the corresponding string
|
||||
// Offsets is of type offest_t
|
||||
// strings will ne null-terminated
|
||||
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
|
||||
// |----------------------------------------------------------------|
|
||||
// | OFFSET ARRAY | STRINGS |
|
||||
// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
|
||||
// | 11 | 15 | 18 | abc\0 | de\0 |
|
||||
// |----------------------------------------------------------------|
|
||||
explicit Tensor(const std::vector<std::string> &strings,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
|
||||
// Same as Tensor(vector<string>) but the input is protobuf bytelist
|
||||
explicit Tensor(const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
|
||||
// A static factory method to create the given flavour of derived Tensor
|
||||
// Returns the base class reference for the Tensor.
|
||||
// @param ptr output argument to hold the created Tensor of given tensor_impl
|
||||
// @param tensor_impl - which implementation of Tensor
|
||||
// @param shape - shape of the tensor
|
||||
// @param type - datatype of the tensor
|
||||
// @param data - data to be copied to Tensor new allocation
|
||||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
|
||||
const unsigned char *data = nullptr);
|
||||
|
||||
// Create a copy of the input tensor
|
||||
// @param out [out] output tensor to be generated
|
||||
// @param in [in] orginal tensor to be copied
|
||||
// @return Status
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
|
||||
return Status::OK();
|
||||
/// Create a copy of the input tensor
|
||||
/// \param[in] in original tensor to be copied
|
||||
/// \param[out] out output tensor to be generated
|
||||
/// \return Status
|
||||
static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) {
|
||||
return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// A static factory method to create a Tensor from a given py::array.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param arr py::array
|
||||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr);
|
||||
|
||||
// Helper function to create a tensor from Numpy of strings
|
||||
static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
|
||||
/// Create a Tensor from a given py::array
|
||||
/// \param[in] arr py::array
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status Code
|
||||
static Status CreateFromNpArray(const py::array &arr, TensorPtr *out);
|
||||
#endif
|
||||
|
||||
// A static factory method to create a Tensor from a given list of strings.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param strings elements of the tensor
|
||||
// @param shape shape of the tensor
|
||||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
/// Create a tensor of type DE_STRING from a BytesList.
|
||||
/// \param[in] bytes_list protobuf's Bytelist
|
||||
/// \param[in] shape shape of the outout tensor
|
||||
/// \param[out] out created Tensor
|
||||
/// \return Status Code
|
||||
static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out);
|
||||
|
||||
// create tensor from protobuf bytelist with strings
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape);
|
||||
/// Create a tensor of type UINT8 or INT8 from a BytesList.
|
||||
/// The tensor will be padded with ' ' to reach the required pad_size.
|
||||
/// \param[in] bytes_list protobuf's Bytelist
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of created tensor. Should be DE_UINT8 or INT8
|
||||
/// \param[in] pad_size The size of the tensor after padding
|
||||
/// \param[out] out created Tensor
|
||||
/// \return Status Code
|
||||
static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
|
||||
const DataType &type, dsize_t pad_size, TensorPtr *out);
|
||||
|
||||
// A static factory method to create a Tensor from a given list of numbers.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param items elements of the tensor
|
||||
// @param shape shape of the tensor
|
||||
// @return Status Code
|
||||
/// Create a Tensor from a given list of values.
|
||||
/// \tparam type of the values to be inserted.
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <typename T>
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<T> &items,
|
||||
const TensorShape &shape_req = TensorShape::CreateUnknownRankShape()) {
|
||||
static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
items.size() == shape.NumOfElements(),
|
||||
"Number of elements in the vector does not match the number of elements of the shape required");
|
||||
DataType type = DataType::FromCType<T>();
|
||||
// if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case.
|
||||
auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
|
||||
TensorShape shape = shape_req;
|
||||
if (!shape.known()) {
|
||||
shape = TensorShape({static_cast<dsize_t>(items.size())});
|
||||
}
|
||||
return CreateTensor(ptr, TensorImpl::kFlexible, shape, type, items_ptr);
|
||||
return CreateFromMemory(shape, type, items_ptr, out);
|
||||
}
|
||||
|
||||
// A static factory method to create a Tensor from a given number.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param item value
|
||||
// @return Status Code
|
||||
/// Create a 1D Tensor from a given list of values.
|
||||
/// \tparam type of the values to be inserted.
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <typename T>
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
|
||||
return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
|
||||
static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) {
|
||||
return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out);
|
||||
}
|
||||
|
||||
// Create tensor from protobuf bytelist with uint8 or int8 types
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape, const DataType &type, dsize_t pad_size);
|
||||
/// Create a numeric scalar Tensor from the given value.
|
||||
/// \tparam T type of value
|
||||
/// \param[in] item value
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status code
|
||||
template <typename T>
|
||||
static Status CreateScalar(const T &item, TensorPtr *out) {
|
||||
DataType type = DataType::FromCType<T>();
|
||||
auto item_ptr = reinterpret_cast<const uchar *>(&item);
|
||||
return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out);
|
||||
}
|
||||
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &path);
|
||||
/// Create a tensor from a binary file on disk.
|
||||
/// \param[in] path file to be read
|
||||
/// \param[out] out Created Tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromFile(const std::string &path, TensorPtr *out);
|
||||
|
||||
// Copy raw data of a array based on shape and strides to the destination pointer
|
||||
// @param dst Pointer to the destination array where the content is to be copied
|
||||
// @param src Pointer to the source of strided array to be copied
|
||||
// @param shape - shape of the source array
|
||||
// @param strides - strides of the source array
|
||||
// @param type_size - number of bytes needed to store one array element's type
|
||||
// @return Status Code
|
||||
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
|
||||
std::vector<dsize_t> strides, uint8_t type_size);
|
||||
|
||||
// Release the memory using the allocator
|
||||
/// Destruct the tensor and release the memory using the allocator
|
||||
virtual ~Tensor();
|
||||
|
||||
// compare the tensor shape and data
|
||||
/// Equality operator. compares tensor shape, type and data
|
||||
/// \param[in] rhs Tensor to be compared with
|
||||
/// \return bool
|
||||
bool operator==(const Tensor &rhs) const;
|
||||
|
||||
bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
|
||||
|
||||
// Get item located at `index`, caller needs to provide the type.
|
||||
// @tparam T
|
||||
// @param index vector<dsize_t>
|
||||
// @return return the item specified at index
|
||||
/// Get item located at `index`, caller needs to provide the type.
|
||||
/// \tparam T
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return the item specified at index
|
||||
template <typename T>
|
||||
Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// Get string located at `index`.
|
||||
// @param index vector<dsize_t>
|
||||
// @return return std::string_view specified at index
|
||||
/// Get string located at `index`.
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return std::string_view specified at index
|
||||
Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
template <typename T>
|
||||
|
@ -225,22 +211,21 @@ class Tensor {
|
|||
template <typename T>
|
||||
Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// set item at location specified by index
|
||||
// @tparam `T`
|
||||
// @param index
|
||||
// @param value of type `T`
|
||||
/// set item at location specified by index
|
||||
/// \tparam `T`
|
||||
/// \param[in] index
|
||||
/// \param[in] value of type `T`
|
||||
template <typename T>
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
|
||||
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
|
||||
T *ptr = nullptr;
|
||||
RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
|
||||
*ptr = value;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// set string item at location specified by index
|
||||
// @param index
|
||||
// @param value of type std::string
|
||||
/// set string item at location specified by index
|
||||
/// \param[in] index
|
||||
/// \param[in] value of type std::string
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
uchar *ptr = nullptr;
|
||||
|
@ -253,7 +238,8 @@ class Tensor {
|
|||
|
||||
return Status::OK();
|
||||
}
|
||||
// fill tensor with Zeros. Does not support strings.
|
||||
|
||||
/// fill tensor with Zeros. Does not support strings.
|
||||
Status Zero() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings..");
|
||||
dsize_t size = SizeInBytes();
|
||||
|
@ -262,13 +248,12 @@ class Tensor {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
|
||||
// @tparam T
|
||||
// @param value
|
||||
/// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
|
||||
/// \tparam T
|
||||
/// \param value[in]
|
||||
template <typename T>
|
||||
Status Fill(const T &value) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings.");
|
||||
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
|
||||
int64_t cellSize = type_.SizeInBytes();
|
||||
if ((data_ != nullptr) && type_.IsCompatible<T>()) {
|
||||
for (dsize_t i = 0; i < Size(); i++) {
|
||||
|
@ -283,91 +268,86 @@ class Tensor {
|
|||
}
|
||||
}
|
||||
|
||||
// Getter function for shape
|
||||
// @return
|
||||
/// Getter function for shape
|
||||
/// \return
|
||||
const TensorShape &shape() const { return shape_; }
|
||||
|
||||
/// Check if tensor has data
|
||||
/// \return bool - true if tensor is empty
|
||||
bool HasData() const;
|
||||
bool HasData() const { return data_ != nullptr; }
|
||||
|
||||
// Reshape the tensor. The given shape should have the same number of elements in the Tensor
|
||||
// @param shape
|
||||
/// Reshape the tensor. The given shape should have the same number of elements in the Tensor
|
||||
/// \param shape
|
||||
virtual Status Reshape(const TensorShape &shape);
|
||||
|
||||
// @return number of elements in this tensor
|
||||
/// \return number of elements in this tensor
|
||||
dsize_t Size() const { return shape().NumOfElements(); }
|
||||
|
||||
// @return the number of bytes this tensor is needs
|
||||
/// \return the number of bytes this tensor is needs
|
||||
dsize_t SizeInBytes() const {
|
||||
if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements();
|
||||
return data_end_ - data_;
|
||||
}
|
||||
|
||||
// @return the rank of the tensor
|
||||
/// \return the rank of the tensor
|
||||
dsize_t Rank() const { return shape().Rank(); }
|
||||
|
||||
// Get the starting memory address as a constant for the data of the tensor. This potentially
|
||||
// drives an allocation if the data area.
|
||||
// @return const unsigned char*
|
||||
const unsigned char *GetBuffer() const;
|
||||
/// Get the starting memory address as a constant for the data of the tensor. This potentially
|
||||
/// drives an allocation if the data area.
|
||||
/// \return const unsigned char*
|
||||
const unsigned char *GetBuffer() const { return data_; }
|
||||
|
||||
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the
|
||||
// tensor's type is a string, otherwise undefined address would be returned.
|
||||
// @return address of the first string of the tensor.
|
||||
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
|
||||
|
||||
// Getter of the type
|
||||
// @return
|
||||
/// Getter of the type
|
||||
/// \return
|
||||
DataType type() const { return type_; }
|
||||
|
||||
// Provide stream operator for displaying it
|
||||
// @param output stream
|
||||
// @param so the Tensor object to be printed
|
||||
// @return output stream
|
||||
/// Provide stream operator for displaying it
|
||||
/// \param output stream
|
||||
/// \param so the Tensor object to be printed
|
||||
/// \return output stream
|
||||
friend std::ostream &operator<<(std::ostream &out, const Tensor &so) {
|
||||
so.Print(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
|
||||
// Calling this method will make the Tensor and its data inaccessible, use it with caution.
|
||||
/// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
|
||||
/// Calling this method will make the Tensor and its data inaccessible, use it with caution.
|
||||
void Invalidate();
|
||||
|
||||
// Copy input tensor into self at the location index.
|
||||
// Index is a vector of axises which can be incomplete:
|
||||
// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
|
||||
// @param index
|
||||
// @param input
|
||||
// @return Status code
|
||||
/// Copy input tensor into self at the location index.
|
||||
/// Index is a vector of axises which can be incomplete:
|
||||
/// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
|
||||
/// \param index
|
||||
/// \param input
|
||||
/// \return Status code
|
||||
Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
|
||||
|
||||
// Find the address of the given index. Used in InsertTensor.
|
||||
// Example:
|
||||
// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
|
||||
// @param index incomplete index
|
||||
// @param output: startAddrofIndex
|
||||
// @param output: remaining
|
||||
// @return Status code
|
||||
/// Find the address of the given index. Used in InsertTensor.
|
||||
/// Example:
|
||||
/// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
|
||||
/// \param index incomplete index
|
||||
/// \param output: startAddrofIndex
|
||||
/// \param output: remaining
|
||||
/// \return Status code
|
||||
Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining);
|
||||
|
||||
// Expand the shape of the Tensor with one extra dimension.
|
||||
// For example, if the shape is <512,512,3>:
|
||||
// *- ExpandDim(0) gives: <1,512,512,3>
|
||||
// *- ExpandDim(1) gives: <512,1,512,3>
|
||||
// *- ExpandDim(3) gives: <512,512,3,1>
|
||||
// @param axis location of the dim
|
||||
/// Expand the shape of the Tensor with one extra dimension.
|
||||
/// For example, if the shape is <512,512,3>:
|
||||
/// *- ExpandDim(0) gives: <1,512,512,3>
|
||||
/// *- ExpandDim(1) gives: <512,1,512,3>
|
||||
/// *- ExpandDim(3) gives: <512,512,3,1>
|
||||
/// \param axis location of the dim
|
||||
virtual Status ExpandDim(const dsize_t &axis);
|
||||
|
||||
virtual void Squeeze();
|
||||
|
||||
// Calculates the strides of the Tensor
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
// The strides will be {6,2,1}.
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
// The strides will be {24,8,4}.
|
||||
// @return vector of integers
|
||||
std::vector<dsize_t> Strides();
|
||||
/// Calculates the strides of the Tensor
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
/// The strides will be {6,2,1}.
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
/// The strides will be {24,8,4}.
|
||||
/// \return vector of integers
|
||||
std::vector<dsize_t> Strides() const;
|
||||
|
||||
std::string ToString() {
|
||||
std::stringstream ss;
|
||||
|
@ -375,26 +355,26 @@ class Tensor {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
// Handle negative indices.
|
||||
/// Handle negative indices.
|
||||
static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
|
||||
|
||||
// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported.
|
||||
// Based on the type of tensor, SliceNumeric or SliceString will be called
|
||||
// @param out Tensor
|
||||
// @param indices vector of indices
|
||||
// @return Status error code
|
||||
Status Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
|
||||
/// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported.
|
||||
/// Based on the type of tensor, SliceNumeric or SliceString will be called
|
||||
/// \param[out] out Tensor
|
||||
/// \param[in] indices vector of indices
|
||||
/// \return Status error code
|
||||
Status Slice(TensorPtr *out, const std::vector<dsize_t> &indices);
|
||||
|
||||
// Slice numeric tensors.
|
||||
Status SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
|
||||
/// Slice numeric tensors.
|
||||
Status SliceNumeric(TensorPtr *out, const std::vector<dsize_t> &indices);
|
||||
|
||||
// Slice string tensors
|
||||
Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
|
||||
/// Slice string tensors
|
||||
Status SliceString(TensorPtr *out, const std::vector<dsize_t> &indices);
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructs numpy array from input tensor
|
||||
// @param data this data is the location of python data
|
||||
// @return Status code
|
||||
/// Constructs numpy array from input tensor
|
||||
/// \param[in] data this data is the location of python data
|
||||
/// \return Status code
|
||||
Status GetDataAsNumpy(py::array *data);
|
||||
|
||||
Status GetDataAsNumpyStrings(py::array *data);
|
||||
|
@ -402,12 +382,12 @@ class Tensor {
|
|||
static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
|
||||
#endif
|
||||
|
||||
// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
|
||||
/// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
|
||||
Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
|
||||
|
||||
// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
|
||||
// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
|
||||
// @tparam T type of values in the Tensor Iterator
|
||||
/// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
|
||||
/// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
|
||||
/// \tparam T type of values in the Tensor Iterator
|
||||
template <typename T, bool = true>
|
||||
class TensorIterator {
|
||||
public:
|
||||
|
@ -498,7 +478,7 @@ class Tensor {
|
|||
};
|
||||
|
||||
// Specialization of TensorIterator for strings. It returns std::string_view for every item.
|
||||
// @tparam DUMMY, used to mbe able to specialize the inner class
|
||||
// \tparam DUMMY, used to mbe able to specialize the inner class
|
||||
template <bool DUMMY>
|
||||
class TensorIterator<std::string_view, DUMMY> {
|
||||
public:
|
||||
|
@ -585,84 +565,192 @@ class Tensor {
|
|||
const char *data_;
|
||||
};
|
||||
|
||||
// Return a TensorIterator that points to the start of the Tensor.
|
||||
// It's the user responsibility to use the correct type that matches the Tensor type
|
||||
// @param T The type of values in the Tensor
|
||||
// @return TensorIterator
|
||||
/// Return a TensorIterator that points to the start of the Tensor.
|
||||
/// It's the user responsibility to use the correct type that matches the Tensor type
|
||||
/// \tparam T The type of values in the Tensor
|
||||
/// \return TensorIterator
|
||||
template <typename T>
|
||||
TensorIterator<T> begin() {
|
||||
AllocateBuffer(SizeInBytes());
|
||||
return TensorIterator<T>(data_);
|
||||
}
|
||||
|
||||
// Return a linear iterator that points to the place after the last element of the Tensor.
|
||||
// @tparam T The type of values in the Tensor
|
||||
// @return TensorIterator
|
||||
/// Return a linear iterator that points to the place after the last element of the Tensor.
|
||||
/// \tparam T The type of values in the Tensor
|
||||
/// \return TensorIterator
|
||||
template <typename T>
|
||||
TensorIterator<T> end() {
|
||||
return TensorIterator<T>(data_end_);
|
||||
}
|
||||
|
||||
// Copies the last dimension at `index` from Tensor `src` to this Tensor.
|
||||
// @param src Tensor
|
||||
// @param index vector to the start of the dimension. The last dim should be 0
|
||||
// @return Status
|
||||
/// Copies the last dimension at `index` from Tensor `src` to this Tensor.
|
||||
/// \param[in] src Tensor
|
||||
/// \param[in] index vector to the start of the dimension. The last dim should be 0
|
||||
/// \return Status
|
||||
Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
|
||||
|
||||
protected:
|
||||
// Get the starting memory address for the data of the tensor. This potentially
|
||||
// drives an allocation if the data is null.
|
||||
// @return unsigned char*
|
||||
unsigned char *GetMutableBuffer();
|
||||
/// Allocate memory for the tensor using the data_allocator
|
||||
/// \param[in] length number of bytes to be allocated
|
||||
/// \return Error Status
|
||||
Status AllocateBuffer(const dsize_t &length);
|
||||
|
||||
// A function that prints Tensor recursively, first called by print
|
||||
// @param out
|
||||
// @param cur_dim
|
||||
// @param cur_index
|
||||
/// Get the starting memory address for the data of the tensor. This potentially
|
||||
/// drives an allocation if the data is null.
|
||||
/// \return unsigned char*
|
||||
unsigned char *GetMutableBuffer() { return data_; }
|
||||
|
||||
/// A function that prints Tensor recursively, first called by print
|
||||
/// \param[in] out
|
||||
/// \param[in] cur_dim
|
||||
/// \param[in] cur_index
|
||||
void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const;
|
||||
|
||||
// A function that prints info about the tensor
|
||||
// @param out output stream
|
||||
/// A function that prints info about the tensor
|
||||
/// \param[out] out output stream
|
||||
void Print(std::ostream &out) const;
|
||||
|
||||
// A function that print the value as specified by its index
|
||||
// @param index vector representing the index
|
||||
// @param out
|
||||
/// A function that print the value as specified by its index
|
||||
/// \param[in] index vector representing the index
|
||||
/// \param[out] out
|
||||
void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const;
|
||||
|
||||
// Get pointer to item located at `index`, caller needs to provide the type.
|
||||
// @tparam T
|
||||
// @param index vector<dsize_t>
|
||||
// @return return a pointer to the item specified at index of type `T`
|
||||
/// Get pointer to item located at `index`, caller needs to provide the type.
|
||||
/// \tparam T
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return a pointer to the item specified at index of type `T`
|
||||
template <typename T>
|
||||
Status GetItemPtr(T **, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// Get pointer to string located at `index` and the length of string
|
||||
// @param index vector<dsize_t>
|
||||
// @return return a pointer to the string specified at index and the length of the string
|
||||
/// Get pointer to string located at `index` and the length of string
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return a pointer to the string specified at index and the length of the string
|
||||
Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
|
||||
|
||||
// Given a flat index of an item string, return the start and length of the item
|
||||
// @param index flat index of the item
|
||||
// @return start address of the ths string
|
||||
// @return length of the string
|
||||
/// Given a flat index of an item string, return the start and length of the item
|
||||
/// \param[in] index flat index of the item
|
||||
/// \param[out] start address of the ths string
|
||||
/// \param[out] length of the string
|
||||
Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
|
||||
|
||||
// all access to shape_ should be via shape
|
||||
/// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if
|
||||
/// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string
|
||||
/// of the tensor.
|
||||
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
|
||||
|
||||
/// all access to shape_ should be via shape
|
||||
TensorShape shape_;
|
||||
// data type of tensor
|
||||
/// data type of tensor
|
||||
DataType type_;
|
||||
// pointer to the start of the physical data
|
||||
/// pointer to the start of the physical data
|
||||
unsigned char *data_;
|
||||
// An allocator for data_
|
||||
/// An allocator for data_
|
||||
CharAllocPtr data_allocator_;
|
||||
// pointer to the end of the physical data
|
||||
/// pointer to the end of the physical data
|
||||
unsigned char *data_end_ = nullptr;
|
||||
|
||||
private:
|
||||
/// Helper function to create a tensor from Numpy array of strings
|
||||
/// \param[in] arr Numpy array
|
||||
/// \param[out] out Created Tensor
|
||||
/// \return Status
|
||||
static Status CreateFromNpString(py::array arr, TensorPtr *out);
|
||||
|
||||
/// Copy raw data of a array based on shape and strides to the destination pointer
|
||||
/// \param dst [out] Pointer to the destination array where the content is to be copied
|
||||
/// \param[in] src Pointer to the source of strided array to be copied
|
||||
/// \param[in] shape shape of the source array
|
||||
/// \param[in] strides strides of the source array
|
||||
/// \param[in] type_size number of bytes needed to store one array element's type
|
||||
/// \return Status Code
|
||||
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
|
||||
std::vector<dsize_t> strides, uint8_t type_size);
|
||||
|
||||
/// const of the size of the offset variable
|
||||
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
|
||||
};
|
||||
template <>
|
||||
inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
|
||||
return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
|
||||
}
|
||||
|
||||
/// Create a Tensor from a given list of strings.
|
||||
/// @note: The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
|
||||
/// The offset array will store one extra value to find the length of the last string.
|
||||
/// OFFSET_1, OFFSET_2, ..., OFFSET_n+1, STRING_1, STRING_2, ..., STRING_n
|
||||
/// The value of each offset is the start index of the corresponding string
|
||||
/// Offsets is of type offset_t
|
||||
/// strings will ne null-terminated
|
||||
/// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
|
||||
/// |----------------------------------------------------------------|
|
||||
/// | OFFSET ARRAY | STRINGS |
|
||||
/// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
|
||||
/// | 11 | 15 | 18 | abc\0 | de\0 |
|
||||
/// |----------------------------------------------------------------|
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <>
|
||||
inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
|
||||
TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
items.size() == shape.NumOfElements(),
|
||||
"Number of elements in the vector does not match the number of elements of the shape required");
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
|
||||
DataType(DataType::DE_STRING));
|
||||
if (items.size() == 0) {
|
||||
if (shape.known()) {
|
||||
return (*out)->Reshape(shape);
|
||||
}
|
||||
}
|
||||
auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
|
||||
dsize_t total_length = std::accumulate(items.begin(), items.end(), 0, length_sum);
|
||||
|
||||
// total bytes needed = offset array + strings
|
||||
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
|
||||
// strings will be null-terminated --> need 1 extra byte per element
|
||||
dsize_t num_bytes = (kOffsetSize + 1) * (*out)->shape_.NumOfElements() + kOffsetSize + total_length;
|
||||
|
||||
(*out)->AllocateBuffer(num_bytes);
|
||||
auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
|
||||
uchar *buf = (*out)->GetStringsBuffer();
|
||||
|
||||
offset_t offset = buf - (*out)->data_; // the first string will start here
|
||||
uint32_t i = 0;
|
||||
for (const auto &str : items) {
|
||||
// insert the start index of the string.
|
||||
offset_arr[i++] = offset;
|
||||
// total bytes are reduced by kOffsetSize
|
||||
num_bytes -= kOffsetSize;
|
||||
// insert actual string
|
||||
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
|
||||
if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
|
||||
// next string will be stored right after the current one.
|
||||
offset = offset + str.length() + 1;
|
||||
// total bytes are reduced by the length of the string
|
||||
num_bytes -= str.length() + 1;
|
||||
}
|
||||
// store one more offset value so we can get the length of the last string
|
||||
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
|
||||
offset_arr[i] = offset;
|
||||
|
||||
(*out)->data_end_ = (*out)->data_ + offset_arr[i];
|
||||
|
||||
MS_ASSERT(num_bytes == 0);
|
||||
if (shape.known()) {
|
||||
RETURN_IF_NOT_OK((*out)->Reshape(shape));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
/// Create a string scalar Tensor from the given value.
|
||||
/// \param[in] item value
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status code
|
||||
template <>
|
||||
inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
|
||||
return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
|
||||
|
|
|
@ -141,8 +141,9 @@ Status BatchFetchRequest::RestoreOneTensor(const TensorMetaMsg *col_ts, const Re
|
|||
#undef CASE
|
||||
|
||||
DataType type(dest);
|
||||
std::shared_ptr<Tensor> ts =
|
||||
std::make_shared<Tensor>(shape, type, static_cast<const unsigned char *>(data.GetPointer()), data.GetSize());
|
||||
std::shared_ptr<Tensor> ts;
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateFromMemory(shape, type, static_cast<const unsigned char *>(data.GetPointer()), data.GetSize(), &ts));
|
||||
// Next we restore the real data which can be embedded or stored separately.
|
||||
if (ts->SizeInBytes() != data.GetSize()) {
|
||||
MS_LOG(ERROR) << "Unexpected length. Read " << data.GetSize() << ". Expected " << ts->SizeInBytes() << ".\n"
|
||||
|
|
|
@ -176,12 +176,15 @@ Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, const std::u
|
|||
|
||||
std::shared_ptr<Tensor> new_tensor;
|
||||
if (first_type.IsNumeric()) { // numeric tensor
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&new_tensor, TensorImpl::kFlexible, new_shape, first_type));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(new_shape, first_type, &new_tensor));
|
||||
dsize_t j = 0;
|
||||
for (auto row : **src) {
|
||||
std::shared_ptr<Tensor> old_tensor = row.at(i); // row j, column i
|
||||
if (old_tensor->shape() == first_shape) { // check the newly popped rows have the same dim as the first
|
||||
RETURN_IF_NOT_OK(new_tensor->InsertTensor({j++}, old_tensor));
|
||||
if (new_shape.NumOfElements() != 0) {
|
||||
RETURN_IF_NOT_OK(new_tensor->InsertTensor({j++}, old_tensor));
|
||||
}
|
||||
// Don't do anything if the tensor has no data
|
||||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("[Batch ERROR] Inconsistent TensorShapes of Column " + std::to_string(i));
|
||||
}
|
||||
|
@ -194,7 +197,7 @@ Status BatchOp::BatchRows(const std::unique_ptr<TensorQTable> *src, const std::u
|
|||
strings.emplace_back(*itr);
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&new_tensor, strings, new_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, new_shape, &new_tensor));
|
||||
}
|
||||
batched_row.emplace_back(new_tensor);
|
||||
}
|
||||
|
@ -352,7 +355,7 @@ Status BatchOp::InvokeBatchMapFunc(TensorBatchTable *input, TensorBatchTable *ou
|
|||
py::list output_list = py::cast<py::list>(ret_tuple[i]);
|
||||
for (size_t j = 0; j < output_list.size(); j++) {
|
||||
std::shared_ptr<Tensor> out;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, py::cast<py::array>(output_list[j])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(py::cast<py::array>(output_list[j]), &out));
|
||||
output_batch.push_back(std::move(out));
|
||||
}
|
||||
output->push_back(std::move(output_batch));
|
||||
|
|
|
@ -226,7 +226,8 @@ void CacheMergeOp::TensorRowRequest::WakeUpAny(TensorRow &&row) {
|
|||
if (GetState() == State::kEmpty) {
|
||||
// We will do a deep copy
|
||||
for (auto &ts : row) {
|
||||
auto out_ts = std::make_shared<Tensor>(ts->shape(), ts->type(), ts->GetBuffer(), ts->SizeInBytes());
|
||||
std::shared_ptr<Tensor> out_ts;
|
||||
Tensor::CreateFromTensor(ts, &out_ts);
|
||||
cleaner_copy_.push_back(out_ts);
|
||||
}
|
||||
cleaner_copy_.setId(row.getId());
|
||||
|
|
|
@ -72,6 +72,7 @@ Status DeviceQueueOp::CheckExceptions(const std::unique_ptr<DataBuffer> &buffer)
|
|||
buffer->GetRow(0, &row);
|
||||
for (const auto &item : row) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(item->type().IsNumeric(), "Cannot send tensor of string type to device.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(item->HasData(), "Cannot send tensor with no data.");
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
|
@ -359,7 +359,7 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string,
|
|||
|
||||
Path path(folder_path_);
|
||||
Path image_path = path / image_label.first;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, image_path.toString()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(image_path.toString(), &image));
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
|
@ -369,9 +369,8 @@ Status CelebAOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string,
|
|||
}
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(),
|
||||
TensorShape({1, (uint32_t)image_label.second.size()}),
|
||||
data_schema_->column(1).type()));
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateEmpty(TensorShape({1, (uint32_t)image_label.second.size()}), data_schema_->column(1).type(), &label));
|
||||
RETURN_IF_NOT_OK(label->Zero());
|
||||
for (uint32_t index = 0; index < image_label.second.size(); index++) {
|
||||
if (image_label.second[index] == 1) {
|
||||
|
|
|
@ -190,15 +190,12 @@ Status CifarOp::LoadTensorRow(uint64_t index, TensorRow *trow) {
|
|||
std::shared_ptr<Tensor> label;
|
||||
std::shared_ptr<Tensor> fine_label;
|
||||
std::shared_ptr<Tensor> ori_image = cifar_image_label_pairs_[index].first;
|
||||
std::shared_ptr<Tensor> copy_image =
|
||||
std::make_shared<Tensor>(ori_image->shape(), ori_image->type(), ori_image->GetBuffer());
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
|
||||
data_schema_->column(1).type(),
|
||||
reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[0])));
|
||||
std::shared_ptr<Tensor> copy_image;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromTensor(ori_image, ©_image));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[0], &label));
|
||||
|
||||
if (cifar_image_label_pairs_[index].second.size() > 1) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&fine_label, data_schema_->column(2).tensorImpl(), data_schema_->column(2).shape(),
|
||||
data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&cifar_image_label_pairs_[index].second[1])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(cifar_image_label_pairs_[index].second[1], &fine_label));
|
||||
(*trow) = TensorRow(index, {copy_image, std::move(label), std::move(fine_label)});
|
||||
} else {
|
||||
(*trow) = TensorRow(index, {copy_image, std::move(label)});
|
||||
|
@ -359,9 +356,8 @@ Status CifarOp::ParseCifarData() {
|
|||
}
|
||||
|
||||
std::shared_ptr<Tensor> image_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image_tensor, data_schema_->column(0).tensorImpl(),
|
||||
TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
|
||||
data_schema_->column(0).type()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({kCifarImageHeight, kCifarImageWidth, kCifarImageChannel}),
|
||||
data_schema_->column(0).type(), &image_tensor));
|
||||
auto itr = image_tensor->begin<uint8_t>();
|
||||
uint32_t total_pix = kCifarImageHeight * kCifarImageWidth;
|
||||
for (int pix = 0; pix < total_pix; ++pix) {
|
||||
|
|
|
@ -127,7 +127,7 @@ Status ClueOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTable>
|
|||
(*tensor_table)->push_back(std::move(tRow));
|
||||
|
||||
std::shared_ptr<Tensor> tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(line, &tensor));
|
||||
(**tensor_table)[row][0] = std::move(tensor);
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -144,26 +144,19 @@ Status ClueOp::GetValue(const nlohmann::json &js, std::vector<std::string> key_c
|
|||
std::string final_str = key_chain.back();
|
||||
switch (cursor.type()) {
|
||||
case nlohmann::detail::value_t::string:
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(t, {cursor.get<std::string>()}, TensorShape::CreateScalar()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<std::string>(), t));
|
||||
break;
|
||||
|
||||
case nlohmann::detail::value_t::number_integer:
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32)));
|
||||
(*t)->SetItemAt<int32_t>({0}, cursor.get<int32_t>());
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<int32_t>(), t));
|
||||
break;
|
||||
case nlohmann::detail::value_t::number_unsigned:
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32)));
|
||||
(*t)->SetItemAt<int32_t>({0}, cursor.get<uint32_t>());
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<uint32_t>(), t));
|
||||
break;
|
||||
case nlohmann::detail::value_t::number_float:
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32)));
|
||||
(*t)->SetItemAt<int32_t>({0}, cursor.get<float>());
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(cursor.get<float>(), t));
|
||||
break;
|
||||
case nlohmann::detail::value_t::array:
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(t, {cursor.get<std::vector<std::string>>()}, TensorShape::CreateScalar()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(cursor.get<std::vector<std::string>>(), t));
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
|
|
@ -239,9 +239,8 @@ Status CocoOp::LoadTensorRow(row_id_type row_id, const std::string &image_id, Te
|
|||
}
|
||||
|
||||
std::vector<dsize_t> bbox_dim = {bbox_row_num, bbox_column_num};
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&coordinate, data_schema_->column(1).tensorImpl(), TensorShape(bbox_dim),
|
||||
data_schema_->column(1).type(),
|
||||
reinterpret_cast<unsigned char *>(&bbox_row[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_row, TensorShape(bbox_dim), &coordinate));
|
||||
|
||||
if (task_type_ == TaskType::Detection) {
|
||||
RETURN_IF_NOT_OK(LoadDetectionTensorRow(row_id, image_id, image, coordinate, trow));
|
||||
} else if (task_type_ == TaskType::Stuff || task_type_ == TaskType::Keypoint) {
|
||||
|
@ -278,13 +277,12 @@ Status CocoOp::LoadDetectionTensorRow(row_id_type row_id, const std::string &ima
|
|||
iscrowd_row.push_back(annotation[i]);
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}),
|
||||
data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(
|
||||
category_id_row, TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), &category_id));
|
||||
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateFromVector(iscrowd_row, TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), &iscrowd));
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}),
|
||||
data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0])));
|
||||
(*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd)});
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -302,9 +300,8 @@ Status CocoOp::LoadSimpleTensorRow(row_id_type row_id, const std::string &image_
|
|||
|
||||
item_queue = itr_item->second;
|
||||
std::vector<dsize_t> bbox_dim = {static_cast<dsize_t>(item_queue.size()), 1};
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&item, data_schema_->column(2).tensorImpl(), TensorShape(bbox_dim),
|
||||
data_schema_->column(2).type(),
|
||||
reinterpret_cast<unsigned char *>(&item_queue[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(item_queue, TensorShape(bbox_dim), &item));
|
||||
|
||||
(*trow) = TensorRow(row_id, {std::move(image), std::move(coordinate), std::move(item)});
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -334,18 +331,14 @@ Status CocoOp::LoadMixTensorRow(row_id_type row_id, const std::string &image_id,
|
|||
area_row.push_back(annotation[i]);
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(
|
||||
category_id_row, TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}), &category_id));
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&category_id, data_schema_->column(2).tensorImpl(), TensorShape({static_cast<dsize_t>(category_id_row.size()), 1}),
|
||||
data_schema_->column(2).type(), reinterpret_cast<unsigned char *>(&category_id_row[0])));
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateFromVector(iscrowd_row, TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}), &iscrowd));
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&iscrowd, data_schema_->column(3).tensorImpl(), TensorShape({static_cast<dsize_t>(iscrowd_row.size()), 1}),
|
||||
data_schema_->column(3).type(), reinterpret_cast<unsigned char *>(&iscrowd_row[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(area_row, TensorShape({static_cast<dsize_t>(area_row.size()), 1}), &area));
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&area, data_schema_->column(4).tensorImpl(), TensorShape({static_cast<dsize_t>(area_row.size()), 1}),
|
||||
data_schema_->column(4).type(), reinterpret_cast<unsigned char *>(&area_row[0])));
|
||||
(*trow) = TensorRow(
|
||||
row_id, {std::move(image), std::move(coordinate), std::move(category_id), std::move(iscrowd), std::move(area)});
|
||||
return Status::OK();
|
||||
|
@ -596,7 +589,7 @@ Status CocoOp::LaunchThreadsAndInitOp() {
|
|||
}
|
||||
|
||||
Status CocoOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
|
||||
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
|
|
|
@ -102,18 +102,13 @@ int CsvOp::CsvParser::put_record(char c) {
|
|||
std::shared_ptr<Tensor> t;
|
||||
switch (column_default_[cur_col_]->type) {
|
||||
case CsvOp::INT:
|
||||
Tensor::CreateTensor(&t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_INT32));
|
||||
t->SetItemAt<int32_t>({0}, std::stoi(s));
|
||||
Tensor::CreateScalar(std::stoi(s), &t);
|
||||
break;
|
||||
case CsvOp::FLOAT:
|
||||
Tensor::CreateTensor(&t, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32));
|
||||
t->SetItemAt<float>({0}, std::stof(s));
|
||||
break;
|
||||
case CsvOp::STRING:
|
||||
Tensor::CreateTensor(&t, {s}, TensorShape::CreateScalar());
|
||||
Tensor::CreateScalar(std::stof(s), &t);
|
||||
break;
|
||||
default:
|
||||
Tensor::CreateTensor(&t, {s}, TensorShape::CreateScalar());
|
||||
Tensor::CreateScalar(s, &t);
|
||||
break;
|
||||
}
|
||||
(*tensor_table_)[cur_row_][cur_col_] = std::move(t);
|
||||
|
|
|
@ -129,7 +129,7 @@ Status GeneratorOp::PyRowToTensorRow(py::object py_data, TensorRow *tensor_row)
|
|||
"Generator should return a tuple of numpy arrays.");
|
||||
}
|
||||
std::shared_ptr<Tensor> tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, ret_py_ele.cast<py::array>()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast<py::array>(), &tensor));
|
||||
if ((!column_types_.empty()) && (column_types_[i] != DataType::DE_UNKNOWN) &&
|
||||
(column_types_[i] != tensor->type())) {
|
||||
return Status(StatusCode::kPyFuncException, __LINE__, __FILE__, "Generator type check failed.");
|
||||
|
|
|
@ -201,10 +201,8 @@ Status ImageFolderOp::WorkerEntry(int32_t worker_id) {
|
|||
// Load 1 TensorRow (image,label) using 1 ImageLabelPair. 1 function call produces 1 TensorTow in a DataBuffer
|
||||
Status ImageFolderOp::LoadTensorRow(row_id_type row_id, ImageLabelPair pairPtr, TensorRow *trow) {
|
||||
std::shared_ptr<Tensor> image, label;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
|
||||
data_schema_->column(1).type(),
|
||||
reinterpret_cast<unsigned char *>(&pairPtr->second)));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, folder_path_ + (pairPtr->first)));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(pairPtr->second, &label));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(folder_path_ + (pairPtr->first), &image));
|
||||
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
|
|
|
@ -185,17 +185,14 @@ Status ManifestOp::LoadTensorRow(row_id_type row_id, const std::pair<std::string
|
|||
std::vector<int32_t> label_index(data.second.size());
|
||||
(void)std::transform(data.second.begin(), data.second.end(), label_index.begin(),
|
||||
[this](const std::string &label_name) { return label_index_[label_name]; });
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_index, &label));
|
||||
if (label_index.size() == 1) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), TensorShape({}),
|
||||
data_schema_->column(1).type(),
|
||||
reinterpret_cast<unsigned char *>(&label_index[0])));
|
||||
label->Reshape(TensorShape({}));
|
||||
} else {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&label, data_schema_->column(1).tensorImpl(), TensorShape(std::vector<dsize_t>(1, label_index.size())),
|
||||
data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&label_index[0])));
|
||||
label->Reshape(TensorShape(std::vector<dsize_t>(1, label_index.size())));
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data.first));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(data.first, &image));
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(image, &image);
|
||||
if (rc.IsError()) {
|
||||
|
|
|
@ -381,15 +381,15 @@ Status MindRecordOp::LoadTensorRow(TensorRow *tensor_row, const std::vector<uint
|
|||
auto num_elements = n_bytes / column_data_type_size;
|
||||
if (type == DataType::DE_STRING) {
|
||||
std::string s{data, data + n_bytes};
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {s}, TensorShape::CreateScalar()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(s, &tensor));
|
||||
} else if (column.hasShape()) {
|
||||
auto new_shape = TensorShape(column.shape());
|
||||
RETURN_IF_NOT_OK(column.MaterializeTensorShape(static_cast<int32_t>(num_elements), &new_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, column.tensorImpl(), new_shape, type, data));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(new_shape, type, data, &tensor));
|
||||
} else {
|
||||
std::vector<dsize_t> shapeDetails = {static_cast<dsize_t>(num_elements)};
|
||||
auto new_shape = TensorShape(shapeDetails);
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, column.tensorImpl(), new_shape, type, data));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(new_shape, type, data, &tensor));
|
||||
}
|
||||
tensor_row->push_back(std::move(tensor));
|
||||
}
|
||||
|
|
|
@ -160,12 +160,10 @@ Status MnistOp::WorkerEntry(int32_t worker_id) {
|
|||
// Load 1 TensorRow (image,label) using 1 MnistLabelPair.
|
||||
Status MnistOp::LoadTensorRow(row_id_type row_id, const MnistLabelPair &mnist_pair, TensorRow *trow) {
|
||||
std::shared_ptr<Tensor> image, label;
|
||||
int32_t l = mnist_pair.second;
|
||||
// make a copy of cached tensor
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), mnist_pair.first->shape(),
|
||||
mnist_pair.first->type(), mnist_pair.first->GetBuffer()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(1).tensorImpl(), data_schema_->column(1).shape(),
|
||||
data_schema_->column(1).type(), reinterpret_cast<unsigned char *>(&l)));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromTensor(mnist_pair.first, &image));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(mnist_pair.second, &label));
|
||||
|
||||
(*trow) = TensorRow(row_id, {std::move(image), std::move(label)});
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -325,8 +323,8 @@ Status MnistOp::ReadImageAndLabel(std::ifstream *image_reader, std::ifstream *la
|
|||
pixels[m] = (pixels[m] == 0) ? 0 : 255;
|
||||
}
|
||||
std::shared_ptr<Tensor> image;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&image, data_schema_->column(0).tensorImpl(), img_tensor_shape,
|
||||
data_schema_->column(0).type(), reinterpret_cast<unsigned char *>(pixels)));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(img_tensor_shape, data_schema_->column(0).type(),
|
||||
reinterpret_cast<unsigned char *>(pixels), &image));
|
||||
image_label_pairs_.emplace_back(std::make_pair(image, labels_buf[j]));
|
||||
}
|
||||
return Status::OK();
|
||||
|
|
|
@ -40,7 +40,7 @@ namespace dataset {
|
|||
template <typename T>
|
||||
class Queue;
|
||||
|
||||
using MnistLabelPair = std::pair<std::shared_ptr<Tensor>, int32_t>;
|
||||
using MnistLabelPair = std::pair<std::shared_ptr<Tensor>, uint32_t>;
|
||||
|
||||
class MnistOp : public ParallelOp, public RandomAccessOp {
|
||||
public:
|
||||
|
|
|
@ -361,8 +361,7 @@ Status RandomDataOp::CreateRandomRow(int32_t worker_id, TensorRow *new_row) {
|
|||
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Failed to set random bytes for a tensor.");
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(&new_tensor, current_col.tensorImpl(), *new_shape, current_col.type(), buf.get()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(*new_shape, current_col.type(), buf.get(), &new_tensor));
|
||||
|
||||
// Add this tensor to the tensor row for output
|
||||
(*new_row).push_back(std::move(new_tensor));
|
||||
|
|
|
@ -41,7 +41,7 @@ Status PythonSampler::GetNextSample(std::unique_ptr<DataBuffer> *out_buffer) {
|
|||
try {
|
||||
py::object py_ret = py_sampler_instance.attr("_get_indices")();
|
||||
py::array np_sample_ids = py_ret.cast<py::array>();
|
||||
Tensor::CreateTensor(&sample_ids, np_sample_ids); // copy numpy to tensor
|
||||
Tensor::CreateFromNpArray(np_sample_ids, &sample_ids); // copy numpy to tensor
|
||||
|
||||
if (HasChildSampler()) {
|
||||
for (auto it = sample_ids->begin<int64_t>(); it != sample_ids->end<int64_t>(); ++it) {
|
||||
|
|
|
@ -73,9 +73,7 @@ Status Sampler::CreateSamplerTensor(std::shared_ptr<Tensor> *sample_ids, int64_t
|
|||
col_desc_ = std::make_unique<ColDescriptor>("sampleIds", DataType(DataType::DE_INT64), TensorImpl::kFlexible, 1);
|
||||
}
|
||||
TensorShape shape(std::vector<dsize_t>(1, num_elements));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, col_desc_->tensorImpl(), shape, col_desc_->type()));
|
||||
RETURN_IF_NOT_OK(
|
||||
(*sample_ids)->AllocateBuffer((*sample_ids)->SizeInBytes())); // allocate memory in case user forgets!
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, col_desc_->type(), sample_ids));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -146,7 +146,7 @@ Status TextFileOp::LoadTensor(const std::string &line, std::unique_ptr<TensorQTa
|
|||
(*tensor_table)->push_back(std::move(tRow));
|
||||
|
||||
std::shared_ptr<Tensor> tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&tensor, {line}, TensorShape::CreateScalar()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateScalar(line, &tensor));
|
||||
(**tensor_table)[row][0] = std::move(tensor);
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -677,8 +677,7 @@ Status TFReaderOp::LoadFeature(const std::unique_ptr<TensorQTable> *tensor_table
|
|||
// into the tensor
|
||||
TensorShape current_shape = TensorShape::CreateUnknownRankShape();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(num_elements, ¤t_shape));
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(&ts, current_col.tensorImpl(), current_shape, current_col.type(), data_ptr));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(current_shape, current_col.type(), data_ptr, &ts));
|
||||
break;
|
||||
}
|
||||
case dataengine::Feature::KindCase::kInt64List: {
|
||||
|
@ -735,7 +734,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
if (current_col.type() == DataType::DE_STRING) {
|
||||
TensorShape shape = TensorShape::CreateScalar();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, &shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, shape, tensor));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -763,7 +762,7 @@ Status TFReaderOp::LoadBytesList(const ColDescriptor ¤t_col, const dataeng
|
|||
// know how many elements there are and the total bytes, create tensor here:
|
||||
TensorShape current_shape = TensorShape::CreateScalar();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape((*num_elements) * pad_size, ¤t_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, bytes_list, current_shape, current_col.type(), pad_size));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromByteList(bytes_list, current_shape, current_col.type(), pad_size, tensor));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -836,10 +835,7 @@ Status TFReaderOp::LoadIntList(const ColDescriptor ¤t_col, const dataengin
|
|||
// know how many elements there are, create tensor here:
|
||||
TensorShape current_shape = TensorShape::CreateUnknownRankShape();
|
||||
RETURN_IF_NOT_OK(current_col.MaterializeTensorShape(*num_elements, ¤t_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, current_col.tensorImpl(), current_shape, current_col.type()));
|
||||
|
||||
// Tensors are lazily allocated, this eagerly allocates memory for the tensor.
|
||||
RETURN_IF_NOT_OK((*tensor)->AllocateBuffer((*tensor)->SizeInBytes()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(current_shape, current_col.type(), tensor));
|
||||
|
||||
int64_t i = 0;
|
||||
auto it = (*tensor)->begin<T>();
|
||||
|
|
|
@ -375,7 +375,7 @@ Status VOCOp::LaunchThreadsAndInitOp() {
|
|||
}
|
||||
|
||||
Status VOCOp::ReadImageToTensor(const std::string &path, const ColDescriptor &col, std::shared_ptr<Tensor> *tensor) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, path));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromFile(path, tensor));
|
||||
if (decode_ == true) {
|
||||
Status rc = Decode(*tensor, tensor);
|
||||
if (rc.IsError()) {
|
||||
|
@ -412,18 +412,10 @@ Status VOCOp::ReadAnnotationToTensor(const std::string &path, TensorRow *row) {
|
|||
bbox_num++;
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&bbox, data_schema_->column(1).tensorImpl(), TensorShape({bbox_num, 4}),
|
||||
data_schema_->column(1).type(),
|
||||
reinterpret_cast<unsigned char *>(&bbox_data[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&label, data_schema_->column(2).tensorImpl(), TensorShape({bbox_num, 1}),
|
||||
data_schema_->column(2).type(),
|
||||
reinterpret_cast<unsigned char *>(&label_data[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&difficult, data_schema_->column(3).tensorImpl(), TensorShape({bbox_num, 1}),
|
||||
data_schema_->column(3).type(),
|
||||
reinterpret_cast<unsigned char *>(&difficult_data[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&truncate, data_schema_->column(4).tensorImpl(), TensorShape({bbox_num, 1}),
|
||||
data_schema_->column(4).type(),
|
||||
reinterpret_cast<unsigned char *>(&truncate_data[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(bbox_data, TensorShape({bbox_num, 4}), &bbox));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(label_data, TensorShape({bbox_num, 1}), &label));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(difficult_data, TensorShape({bbox_num, 1}), &difficult));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(truncate_data, TensorShape({bbox_num, 1}), &truncate));
|
||||
(*row) = TensorRow({std::move(bbox), std::move(label), std::move(difficult), std::move(truncate)});
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -57,8 +57,7 @@ Status Graph::CreateTensorByVector(const std::vector<std::vector<T>> &data, Data
|
|||
std::shared_ptr<Tensor> tensor;
|
||||
size_t m = data.size();
|
||||
size_t n = data[0].size();
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(
|
||||
&tensor, TensorImpl::kFlexible, TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, nullptr));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape({static_cast<dsize_t>(m), static_cast<dsize_t>(n)}), type, &tensor));
|
||||
auto ptr = tensor->begin<T>();
|
||||
for (const auto &id_m : data) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(id_m.size() == n, "Each member of the vector has a different size");
|
||||
|
@ -310,8 +309,7 @@ Status Graph::GetNodeFeature(const std::shared_ptr<Tensor> &nodes, const std::ve
|
|||
dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>());
|
||||
shape = shape.PrependDim(size);
|
||||
std::shared_ptr<Tensor> fea_tensor;
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(&fea_tensor, TensorImpl::kFlexible, shape, default_feature->Value()->type(), nullptr));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, default_feature->Value()->type(), &fea_tensor));
|
||||
|
||||
dsize_t index = 0;
|
||||
for (auto node_itr = nodes->begin<NodeIdType>(); node_itr != nodes->end<NodeIdType>(); ++node_itr) {
|
||||
|
@ -358,8 +356,7 @@ Status Graph::GetEdgeFeature(const std::shared_ptr<Tensor> &edges, const std::ve
|
|||
dsize_t size = std::accumulate(shape_vec.begin(), shape_vec.end(), 1, std::multiplies<dsize_t>());
|
||||
shape = shape.PrependDim(size);
|
||||
std::shared_ptr<Tensor> fea_tensor;
|
||||
RETURN_IF_NOT_OK(
|
||||
Tensor::CreateTensor(&fea_tensor, TensorImpl::kFlexible, shape, default_feature->Value()->type(), nullptr));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(shape, default_feature->Value()->type(), &fea_tensor));
|
||||
|
||||
dsize_t index = 0;
|
||||
for (auto edge_itr = edges->begin<EdgeIdType>(); edge_itr != edges->end<EdgeIdType>(); ++edge_itr) {
|
||||
|
|
|
@ -125,7 +125,7 @@ Status GraphLoader::LoadNode(const std::vector<uint8_t> &col_blob, const mindrec
|
|||
(*feature_map)[node_type].insert(ind);
|
||||
if ((*default_feature)[ind] == nullptr) {
|
||||
std::shared_ptr<Tensor> zero_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&zero_tensor, TensorImpl::kFlexible, tensor->shape(), tensor->type()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(tensor->shape(), tensor->type(), &zero_tensor));
|
||||
RETURN_IF_NOT_OK(zero_tensor->Zero());
|
||||
(*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor);
|
||||
}
|
||||
|
@ -151,7 +151,7 @@ Status GraphLoader::LoadEdge(const std::vector<uint8_t> &col_blob, const mindrec
|
|||
(*feature_map)[edge_type].insert(ind);
|
||||
if ((*default_feature)[ind] == nullptr) {
|
||||
std::shared_ptr<Tensor> zero_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&zero_tensor, TensorImpl::kFlexible, tensor->shape(), tensor->type()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(tensor->shape(), tensor->type(), &zero_tensor));
|
||||
RETURN_IF_NOT_OK(zero_tensor->Zero());
|
||||
(*default_feature)[ind] = std::make_shared<Feature>(ind, zero_tensor);
|
||||
}
|
||||
|
@ -170,9 +170,9 @@ Status GraphLoader::LoadFeatureTensor(const std::string &key, const std::vector<
|
|||
key, col_blob, col_jsn, &data, &data_ptr, &n_bytes, &col_type, &col_type_size, &column_shape);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(rs == mindrecord::SUCCESS, "fail to load column" + key);
|
||||
if (data == nullptr) data = reinterpret_cast<const unsigned char *>(&data_ptr[0]);
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(tensor, TensorImpl::kFlexible,
|
||||
std::move(TensorShape({static_cast<dsize_t>(n_bytes / col_type_size)})),
|
||||
std::move(DataType(mindrecord::ColumnDataTypeNameNormalized[col_type])), data));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(std::move(TensorShape({static_cast<dsize_t>(n_bytes / col_type_size)})),
|
||||
std::move(DataType(mindrecord::ColumnDataTypeNameNormalized[col_type])),
|
||||
data, tensor));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@
|
|||
#include "pybind11/stl.h"
|
||||
#endif
|
||||
|
||||
#include "common/utils.h"
|
||||
#include "minddata/dataset/core/constants.h"
|
||||
#include "minddata/dataset/core/data_type.h"
|
||||
#include "minddata/dataset/core/tensor_shape.h"
|
||||
|
@ -50,170 +51,155 @@ class Allocator;
|
|||
|
||||
using CharAllocPtr = std::unique_ptr<Allocator<unsigned char>>;
|
||||
using TensorAllocPtr = std::shared_ptr<Allocator<Tensor>>; // An allocator shared_ptr for Tensors
|
||||
using offset_t = uint32_t; // type of offset values to store strings locations
|
||||
using TensorPtr = std::shared_ptr<Tensor>;
|
||||
|
||||
class Tensor {
|
||||
public:
|
||||
Tensor() = delete;
|
||||
|
||||
// Create a new tensor, does not internally allocate storage. This constructor is protected, use CreateTensor.
|
||||
// @note The shape and type information should be known and valid.
|
||||
// @param shape TensorShape
|
||||
// @param type DataType
|
||||
Tensor(const TensorShape &shape, const DataType &type);
|
||||
|
||||
// Create a new tensor, allocates storage and copies in data. This constructor is protected, use CreateTensor.
|
||||
// @note The buffer should be valid and the shape and type information should be known and valid.
|
||||
// @param shape TensorShape
|
||||
// @param type DataType
|
||||
// @param data unsigned char*, pointer to the data.
|
||||
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data);
|
||||
|
||||
Tensor(const TensorShape &shape, const DataType &type, const unsigned char *data, const dsize_t &length);
|
||||
|
||||
Tensor(const Tensor &other) = delete;
|
||||
|
||||
Tensor &operator=(const Tensor &other) = delete;
|
||||
|
||||
/// Create a tensor using shape and type. This constructor should not be used directly, use CreateFromTensor instead
|
||||
/// \note The shape and type information should be known and valid
|
||||
/// \note The constructor does not allocate data
|
||||
/// \param shape TensorShape
|
||||
/// \param type DataType
|
||||
Tensor(const TensorShape &shape, const DataType &type);
|
||||
|
||||
/// Move constructor
|
||||
/// \param other Tensor to be moved
|
||||
Tensor(Tensor &&other) noexcept;
|
||||
|
||||
/// Move assigment operator
|
||||
/// \param other Tensor to be moved
|
||||
Tensor &operator=(Tensor &&other) noexcept;
|
||||
|
||||
Status AllocateBuffer(const dsize_t &length);
|
||||
/// Create a numeric tensor with type and shape. Items of the tensor would be uninitialized.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateEmpty(const TensorShape &shape, const DataType &type, TensorPtr *out);
|
||||
|
||||
// type of offest values to store strings information
|
||||
using offset_t = uint32_t;
|
||||
// const of the size of the offset variable
|
||||
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
|
||||
// Tensor base class which holds the data in an unsigned char* buffer.
|
||||
/// Create a numeric tensor from a pointer in memory. Length of the source data is determined from the shape and type.
|
||||
/// Data will be copied into the new created tensor.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[in] src pointer to the source data
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src, TensorPtr *out);
|
||||
|
||||
// Construct a scalar string Tensor
|
||||
explicit Tensor(const std::string &str) : Tensor(std::vector<std::string>{str}, TensorShape::CreateScalar()) {}
|
||||
/// Create a tensor from a pointer in memory and length. Data will be copied into the new created tensor.
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of the output tensor
|
||||
/// \param[in] src pointer to the source data
|
||||
/// \param[in] length length of the src data
|
||||
/// \param[out] out Generated tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromMemory(const TensorShape &shape, const DataType &type, const uchar *src,
|
||||
const dsize_t &length, TensorPtr *out);
|
||||
|
||||
// Construct a tensor from a list of strings. Reshape the tensor with `shape` if given, otherwise assume the shape is
|
||||
// the size of the vector `strings`.
|
||||
// The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
|
||||
// Thr offset array will store one extra value to find the length of the last string.
|
||||
// OFFSET1, OFFSET2, ..., OFFSETn+1, STRING1, STRING2, ..., STRINGn
|
||||
// The value of each offset is the start index of the corresponding string
|
||||
// Offsets is of type offest_t
|
||||
// strings will ne null-terminated
|
||||
// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
|
||||
// |----------------------------------------------------------------|
|
||||
// | OFFSET ARRAY | STRINGS |
|
||||
// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
|
||||
// | 11 | 15 | 18 | abc\0 | de\0 |
|
||||
// |----------------------------------------------------------------|
|
||||
explicit Tensor(const std::vector<std::string> &strings,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
|
||||
// Same as Tensor(vector<string>) but the input is protobuf bytelist
|
||||
explicit Tensor(const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
|
||||
// A static factory method to create the given flavour of derived Tensor
|
||||
// Returns the base class reference for the Tensor.
|
||||
// @param ptr output argument to hold the created Tensor of given tensor_impl
|
||||
// @param tensor_impl - which implementation of Tensor
|
||||
// @param shape - shape of the tensor
|
||||
// @param type - datatype of the tensor
|
||||
// @param data - data to be copied to Tensor new allocation
|
||||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *, TensorImpl tensor_impl, const TensorShape &shape, DataType type,
|
||||
const unsigned char *data = nullptr);
|
||||
|
||||
// Create a copy of the input tensor
|
||||
// @param out [out] output tensor to be generated
|
||||
// @param in [in] orginal tensor to be copied
|
||||
// @return Status
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *out, const std::shared_ptr<Tensor> &in) {
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes());
|
||||
return Status::OK();
|
||||
/// Create a copy of the input tensor
|
||||
/// \param[in] in original tensor to be copied
|
||||
/// \param[out] out output tensor to be generated
|
||||
/// \return Status
|
||||
static Status CreateFromTensor(const TensorPtr &in, TensorPtr *out) {
|
||||
return CreateFromMemory(in->shape(), in->type(), in->GetBuffer(), in->SizeInBytes(), out);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// A static factory method to create a Tensor from a given py::array.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param arr py::array
|
||||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, py::array arr);
|
||||
|
||||
// Helper function to create a tensor from Numpy of strings
|
||||
static Status CreateTensorFromNumpyString(std::shared_ptr<Tensor> *ptr, py::array arr);
|
||||
/// Create a Tensor from a given py::array
|
||||
/// \param[in] arr py::array
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status Code
|
||||
static Status CreateFromNpArray(const py::array &arr, TensorPtr *out);
|
||||
#endif
|
||||
|
||||
// A static factory method to create a Tensor from a given list of strings.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param strings elements of the tensor
|
||||
// @param shape shape of the tensor
|
||||
// @return Status Code
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<std::string> &strings,
|
||||
const TensorShape &shape = TensorShape::CreateUnknownRankShape());
|
||||
/// Create a tensor of type DE_STRING from a BytesList.
|
||||
/// \param[in] bytes_list protobuf's Bytelist
|
||||
/// \param[in] shape shape of the outout tensor
|
||||
/// \param[out] out created Tensor
|
||||
/// \return Status Code
|
||||
static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape, TensorPtr *out);
|
||||
|
||||
// create tensor from protobuf bytelist with strings
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape);
|
||||
/// Create a tensor of type UINT8 or INT8 from a BytesList.
|
||||
/// The tensor will be padded with ' ' to reach the required pad_size.
|
||||
/// \param[in] bytes_list protobuf's Bytelist
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[in] type type of created tensor. Should be DE_UINT8 or INT8
|
||||
/// \param[in] pad_size The size of the tensor after padding
|
||||
/// \param[out] out created Tensor
|
||||
/// \return Status Code
|
||||
static Status CreateFromByteList(const dataengine::BytesList &bytes_list, const TensorShape &shape,
|
||||
const DataType &type, dsize_t pad_size, TensorPtr *out);
|
||||
|
||||
// A static factory method to create a Tensor from a given list of numbers.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param items elements of the tensor
|
||||
// @param shape shape of the tensor
|
||||
// @return Status Code
|
||||
/// Create a Tensor from a given list of values.
|
||||
/// \tparam type of the values to be inserted.
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <typename T>
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::vector<T> &items,
|
||||
const TensorShape &shape_req = TensorShape::CreateUnknownRankShape()) {
|
||||
static Status CreateFromVector(const std::vector<T> &items, const TensorShape &shape, TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
items.size() == shape.NumOfElements(),
|
||||
"Number of elements in the vector does not match the number of elements of the shape required");
|
||||
DataType type = DataType::FromCType<T>();
|
||||
// if items is empty, items_ptr would be nullptr. CreateFromMemory will handle this case.
|
||||
auto items_ptr = reinterpret_cast<const uchar *>(&items[0]);
|
||||
TensorShape shape = shape_req;
|
||||
if (!shape.known()) {
|
||||
shape = TensorShape({static_cast<dsize_t>(items.size())});
|
||||
}
|
||||
return CreateTensor(ptr, TensorImpl::kFlexible, shape, type, items_ptr);
|
||||
return CreateFromMemory(shape, type, items_ptr, out);
|
||||
}
|
||||
|
||||
// A static factory method to create a Tensor from a given number.
|
||||
// @param ptr output argument to hold the created Tensor
|
||||
// @param item value
|
||||
// @return Status Code
|
||||
/// Create a 1D Tensor from a given list of values.
|
||||
/// \tparam type of the values to be inserted.
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <typename T>
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const T &item) {
|
||||
return CreateTensor<T>(ptr, {item}, TensorShape::CreateScalar());
|
||||
static Status CreateFromVector(const std::vector<T> &items, TensorPtr *out) {
|
||||
return CreateFromVector(items, TensorShape({static_cast<dsize_t>(items.size())}), out);
|
||||
}
|
||||
|
||||
// Create tensor from protobuf bytelist with uint8 or int8 types
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const dataengine::BytesList &bytes_list,
|
||||
const TensorShape &shape, const DataType &type, dsize_t pad_size);
|
||||
/// Create a numeric scalar Tensor from the given value.
|
||||
/// \tparam T type of value
|
||||
/// \param[in] item value
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status code
|
||||
template <typename T>
|
||||
static Status CreateScalar(const T &item, TensorPtr *out) {
|
||||
DataType type = DataType::FromCType<T>();
|
||||
auto item_ptr = reinterpret_cast<const uchar *>(&item);
|
||||
return CreateFromMemory(TensorShape::CreateScalar(), type, item_ptr, out);
|
||||
}
|
||||
|
||||
static Status CreateTensor(std::shared_ptr<Tensor> *ptr, const std::string &path);
|
||||
/// Create a tensor from a binary file on disk.
|
||||
/// \param[in] path file to be read
|
||||
/// \param[out] out Created Tensor
|
||||
/// \return Status code
|
||||
static Status CreateFromFile(const std::string &path, TensorPtr *out);
|
||||
|
||||
// Copy raw data of a array based on shape and strides to the destination pointer
|
||||
// @param dst Pointer to the destination array where the content is to be copied
|
||||
// @param src Pointer to the source of strided array to be copied
|
||||
// @param shape - shape of the source array
|
||||
// @param strides - strides of the source array
|
||||
// @param type_size - number of bytes needed to store one array element's type
|
||||
// @return Status Code
|
||||
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
|
||||
std::vector<dsize_t> strides, uint8_t type_size);
|
||||
|
||||
// Release the memory using the allocator
|
||||
/// Destruct the tensor and release the memory using the allocator
|
||||
virtual ~Tensor();
|
||||
|
||||
// compare the tensor shape and data
|
||||
/// Equality operator. compares tensor shape, type and data
|
||||
/// \param[in] rhs Tensor to be compared with
|
||||
/// \return bool
|
||||
bool operator==(const Tensor &rhs) const;
|
||||
|
||||
bool operator!=(const Tensor &rhs) const { return !((*this) == rhs); }
|
||||
|
||||
// Get item located at `index`, caller needs to provide the type.
|
||||
// @tparam T
|
||||
// @param index vector<dsize_t>
|
||||
// @return return the item specified at index
|
||||
/// Get item located at `index`, caller needs to provide the type.
|
||||
/// \tparam T
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return the item specified at index
|
||||
template <typename T>
|
||||
Status GetItemAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// Get string located at `index`.
|
||||
// @param index vector<dsize_t>
|
||||
// @return return std::string_view specified at index
|
||||
/// Get string located at `index`.
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return std::string_view specified at index
|
||||
Status GetItemAt(std::string_view *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
template <typename T>
|
||||
|
@ -225,22 +211,21 @@ class Tensor {
|
|||
template <typename T>
|
||||
Status GetFloatAt(T *o, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// set item at location specified by index
|
||||
// @tparam `T`
|
||||
// @param index
|
||||
// @param value of type `T`
|
||||
/// set item at location specified by index
|
||||
/// \tparam `T`
|
||||
/// \param[in] index
|
||||
/// \param[in] value of type `T`
|
||||
template <typename T>
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const T &value) {
|
||||
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
|
||||
T *ptr = nullptr;
|
||||
RETURN_IF_NOT_OK(GetItemPtr<T>(&ptr, index));
|
||||
*ptr = value;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// set string item at location specified by index
|
||||
// @param index
|
||||
// @param value of type std::string
|
||||
/// set string item at location specified by index
|
||||
/// \param[in] index
|
||||
/// \param[in] value of type std::string
|
||||
Status SetItemAt(const std::vector<dsize_t> &index, const std::string &value) {
|
||||
RETURN_UNEXPECTED_IF_NULL(data_);
|
||||
uchar *ptr = nullptr;
|
||||
|
@ -253,7 +238,8 @@ class Tensor {
|
|||
|
||||
return Status::OK();
|
||||
}
|
||||
// fill tensor with Zeros. Does not support strings.
|
||||
|
||||
/// fill tensor with Zeros. Does not support strings.
|
||||
Status Zero() {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use Zero on tensor of strings..");
|
||||
dsize_t size = SizeInBytes();
|
||||
|
@ -262,13 +248,12 @@ class Tensor {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
|
||||
// @tparam T
|
||||
// @param value
|
||||
/// Fill all elements in the Tensor with the given value of type `T`. Does not support strings.
|
||||
/// \tparam T
|
||||
/// \param value[in]
|
||||
template <typename T>
|
||||
Status Fill(const T &value) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(type_ != DataType::DE_STRING, "Cannot use fill on tensor of strings.");
|
||||
RETURN_IF_NOT_OK(AllocateBuffer(SizeInBytes()));
|
||||
int64_t cellSize = type_.SizeInBytes();
|
||||
if ((data_ != nullptr) && type_.IsCompatible<T>()) {
|
||||
for (dsize_t i = 0; i < Size(); i++) {
|
||||
|
@ -283,91 +268,86 @@ class Tensor {
|
|||
}
|
||||
}
|
||||
|
||||
// Getter function for shape
|
||||
// @return
|
||||
/// Getter function for shape
|
||||
/// \return
|
||||
const TensorShape &shape() const { return shape_; }
|
||||
|
||||
/// Check if tensor has data
|
||||
/// \return bool - true if tensor is empty
|
||||
bool HasData() const;
|
||||
bool HasData() const { return data_ != nullptr; }
|
||||
|
||||
// Reshape the tensor. The given shape should have the same number of elements in the Tensor
|
||||
// @param shape
|
||||
/// Reshape the tensor. The given shape should have the same number of elements in the Tensor
|
||||
/// \param shape
|
||||
virtual Status Reshape(const TensorShape &shape);
|
||||
|
||||
// @return number of elements in this tensor
|
||||
/// \return number of elements in this tensor
|
||||
dsize_t Size() const { return shape().NumOfElements(); }
|
||||
|
||||
// @return the number of bytes this tensor is needs
|
||||
/// \return the number of bytes this tensor is needs
|
||||
dsize_t SizeInBytes() const {
|
||||
if (data_end_ == nullptr) return type_.SizeInBytes() * shape_.NumOfElements();
|
||||
return data_end_ - data_;
|
||||
}
|
||||
|
||||
// @return the rank of the tensor
|
||||
/// \return the rank of the tensor
|
||||
dsize_t Rank() const { return shape().Rank(); }
|
||||
|
||||
// Get the starting memory address as a constant for the data of the tensor. This potentially
|
||||
// drives an allocation if the data area.
|
||||
// @return const unsigned char*
|
||||
const unsigned char *GetBuffer() const;
|
||||
/// Get the starting memory address as a constant for the data of the tensor. This potentially
|
||||
/// drives an allocation if the data area.
|
||||
/// \return const unsigned char*
|
||||
const unsigned char *GetBuffer() const { return data_; }
|
||||
|
||||
// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if the
|
||||
// tensor's type is a string, otherwise undefined address would be returned.
|
||||
// @return address of the first string of the tensor.
|
||||
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
|
||||
|
||||
// Getter of the type
|
||||
// @return
|
||||
/// Getter of the type
|
||||
/// \return
|
||||
DataType type() const { return type_; }
|
||||
|
||||
// Provide stream operator for displaying it
|
||||
// @param output stream
|
||||
// @param so the Tensor object to be printed
|
||||
// @return output stream
|
||||
/// Provide stream operator for displaying it
|
||||
/// \param output stream
|
||||
/// \param so the Tensor object to be printed
|
||||
/// \return output stream
|
||||
friend std::ostream &operator<<(std::ostream &out, const Tensor &so) {
|
||||
so.Print(out);
|
||||
return out;
|
||||
}
|
||||
|
||||
// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
|
||||
// Calling this method will make the Tensor and its data inaccessible, use it with caution.
|
||||
/// Invalidate this Tensor by setting the type and shape to unknown and MData to null.
|
||||
/// Calling this method will make the Tensor and its data inaccessible, use it with caution.
|
||||
void Invalidate();
|
||||
|
||||
// Copy input tensor into self at the location index.
|
||||
// Index is a vector of axises which can be incomplete:
|
||||
// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
|
||||
// @param index
|
||||
// @param input
|
||||
// @return Status code
|
||||
/// Copy input tensor into self at the location index.
|
||||
/// Index is a vector of axises which can be incomplete:
|
||||
/// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell.
|
||||
/// \param index
|
||||
/// \param input
|
||||
/// \return Status code
|
||||
Status InsertTensor(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
|
||||
|
||||
// Find the address of the given index. Used in InsertTensor.
|
||||
// Example:
|
||||
// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
|
||||
// @param index incomplete index
|
||||
// @param output: startAddrofIndex
|
||||
// @param output: remaining
|
||||
// @return Status code
|
||||
/// Find the address of the given index. Used in InsertTensor.
|
||||
/// Example:
|
||||
/// Tensor t= [[1,2],[3,4]] , StartAddrOfIndex({0}) -> &1
|
||||
/// \param index incomplete index
|
||||
/// \param output: startAddrofIndex
|
||||
/// \param output: remaining
|
||||
/// \return Status code
|
||||
Status StartAddrOfIndex(std::vector<dsize_t> ind, uchar **start_addr_of_index, TensorShape *remaining);
|
||||
|
||||
// Expand the shape of the Tensor with one extra dimension.
|
||||
// For example, if the shape is <512,512,3>:
|
||||
// *- ExpandDim(0) gives: <1,512,512,3>
|
||||
// *- ExpandDim(1) gives: <512,1,512,3>
|
||||
// *- ExpandDim(3) gives: <512,512,3,1>
|
||||
// @param axis location of the dim
|
||||
/// Expand the shape of the Tensor with one extra dimension.
|
||||
/// For example, if the shape is <512,512,3>:
|
||||
/// *- ExpandDim(0) gives: <1,512,512,3>
|
||||
/// *- ExpandDim(1) gives: <512,1,512,3>
|
||||
/// *- ExpandDim(3) gives: <512,512,3,1>
|
||||
/// \param axis location of the dim
|
||||
virtual Status ExpandDim(const dsize_t &axis);
|
||||
|
||||
virtual void Squeeze();
|
||||
|
||||
// Calculates the strides of the Tensor
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
// The strides will be {6,2,1}.
|
||||
// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
// The strides will be {24,8,4}.
|
||||
// @return vector of integers
|
||||
std::vector<dsize_t> Strides();
|
||||
/// Calculates the strides of the Tensor
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT8 (1 byte)
|
||||
/// The strides will be {6,2,1}.
|
||||
/// Ex: Tensor of shape <4,2,2> and type DE_UINT32 (4 byte)
|
||||
/// The strides will be {24,8,4}.
|
||||
/// \return vector of integers
|
||||
std::vector<dsize_t> Strides() const;
|
||||
|
||||
std::string ToString() {
|
||||
std::stringstream ss;
|
||||
|
@ -375,26 +355,26 @@ class Tensor {
|
|||
return ss.str();
|
||||
}
|
||||
|
||||
// Handle negative indices.
|
||||
/// Handle negative indices.
|
||||
static inline dsize_t HandleNeg(dsize_t index, dsize_t length) { return (index < 0) ? (index + length) : index; }
|
||||
|
||||
// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported.
|
||||
// Based on the type of tensor, SliceNumeric or SliceString will be called
|
||||
// @param out Tensor
|
||||
// @param indices vector of indices
|
||||
// @return Status error code
|
||||
Status Slice(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
|
||||
/// Slice tensor bases on the given indicies. Copy the sliced data into out tensor. Only rank1 tensors are supported.
|
||||
/// Based on the type of tensor, SliceNumeric or SliceString will be called
|
||||
/// \param[out] out Tensor
|
||||
/// \param[in] indices vector of indices
|
||||
/// \return Status error code
|
||||
Status Slice(TensorPtr *out, const std::vector<dsize_t> &indices);
|
||||
|
||||
// Slice numeric tensors.
|
||||
Status SliceNumeric(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
|
||||
/// Slice numeric tensors.
|
||||
Status SliceNumeric(TensorPtr *out, const std::vector<dsize_t> &indices);
|
||||
|
||||
// Slice string tensors
|
||||
Status SliceString(std::shared_ptr<Tensor> *out, const std::vector<dsize_t> &indices);
|
||||
/// Slice string tensors
|
||||
Status SliceString(TensorPtr *out, const std::vector<dsize_t> &indices);
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Constructs numpy array from input tensor
|
||||
// @param data this data is the location of python data
|
||||
// @return Status code
|
||||
/// Constructs numpy array from input tensor
|
||||
/// \param[in] data this data is the location of python data
|
||||
/// \return Status code
|
||||
Status GetDataAsNumpy(py::array *data);
|
||||
|
||||
Status GetDataAsNumpyStrings(py::array *data);
|
||||
|
@ -402,12 +382,12 @@ class Tensor {
|
|||
static Status GetBufferInfo(Tensor *t, py::buffer_info *out);
|
||||
#endif
|
||||
|
||||
// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
|
||||
/// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor
|
||||
Status Concatenate(const std::vector<dsize_t> &index, const std::shared_ptr<Tensor> &input);
|
||||
|
||||
// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
|
||||
// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
|
||||
// @tparam T type of values in the Tensor Iterator
|
||||
/// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor
|
||||
/// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6
|
||||
/// \tparam T type of values in the Tensor Iterator
|
||||
template <typename T, bool = true>
|
||||
class TensorIterator {
|
||||
public:
|
||||
|
@ -498,7 +478,7 @@ class Tensor {
|
|||
};
|
||||
|
||||
// Specialization of TensorIterator for strings. It returns std::string_view for every item.
|
||||
// @tparam DUMMY, used to mbe able to specialize the inner class
|
||||
// \tparam DUMMY, used to mbe able to specialize the inner class
|
||||
template <bool DUMMY>
|
||||
class TensorIterator<std::string_view, DUMMY> {
|
||||
public:
|
||||
|
@ -585,84 +565,192 @@ class Tensor {
|
|||
const char *data_;
|
||||
};
|
||||
|
||||
// Return a TensorIterator that points to the start of the Tensor.
|
||||
// It's the user responsibility to use the correct type that matches the Tensor type
|
||||
// @param T The type of values in the Tensor
|
||||
// @return TensorIterator
|
||||
/// Return a TensorIterator that points to the start of the Tensor.
|
||||
/// It's the user responsibility to use the correct type that matches the Tensor type
|
||||
/// \tparam T The type of values in the Tensor
|
||||
/// \return TensorIterator
|
||||
template <typename T>
|
||||
TensorIterator<T> begin() {
|
||||
AllocateBuffer(SizeInBytes());
|
||||
return TensorIterator<T>(data_);
|
||||
}
|
||||
|
||||
// Return a linear iterator that points to the place after the last element of the Tensor.
|
||||
// @tparam T The type of values in the Tensor
|
||||
// @return TensorIterator
|
||||
/// Return a linear iterator that points to the place after the last element of the Tensor.
|
||||
/// \tparam T The type of values in the Tensor
|
||||
/// \return TensorIterator
|
||||
template <typename T>
|
||||
TensorIterator<T> end() {
|
||||
return TensorIterator<T>(data_end_);
|
||||
}
|
||||
|
||||
// Copies the last dimension at `index` from Tensor `src` to this Tensor.
|
||||
// @param src Tensor
|
||||
// @param index vector to the start of the dimension. The last dim should be 0
|
||||
// @return Status
|
||||
/// Copies the last dimension at `index` from Tensor `src` to this Tensor.
|
||||
/// \param[in] src Tensor
|
||||
/// \param[in] index vector to the start of the dimension. The last dim should be 0
|
||||
/// \return Status
|
||||
Status CopyLastDimAt(const std::shared_ptr<Tensor> &src, const std::vector<dsize_t> &index);
|
||||
|
||||
protected:
|
||||
// Get the starting memory address for the data of the tensor. This potentially
|
||||
// drives an allocation if the data is null.
|
||||
// @return unsigned char*
|
||||
unsigned char *GetMutableBuffer();
|
||||
/// Allocate memory for the tensor using the data_allocator
|
||||
/// \param[in] length number of bytes to be allocated
|
||||
/// \return Error Status
|
||||
Status AllocateBuffer(const dsize_t &length);
|
||||
|
||||
// A function that prints Tensor recursively, first called by print
|
||||
// @param out
|
||||
// @param cur_dim
|
||||
// @param cur_index
|
||||
/// Get the starting memory address for the data of the tensor. This potentially
|
||||
/// drives an allocation if the data is null.
|
||||
/// \return unsigned char*
|
||||
unsigned char *GetMutableBuffer() { return data_; }
|
||||
|
||||
/// A function that prints Tensor recursively, first called by print
|
||||
/// \param[in] out
|
||||
/// \param[in] cur_dim
|
||||
/// \param[in] cur_index
|
||||
void PrintRecursive(std::ostream &out, int32_t cur_dim, const std::vector<dsize_t> &cur_index) const;
|
||||
|
||||
// A function that prints info about the tensor
|
||||
// @param out output stream
|
||||
/// A function that prints info about the tensor
|
||||
/// \param[out] out output stream
|
||||
void Print(std::ostream &out) const;
|
||||
|
||||
// A function that print the value as specified by its index
|
||||
// @param index vector representing the index
|
||||
// @param out
|
||||
/// A function that print the value as specified by its index
|
||||
/// \param[in] index vector representing the index
|
||||
/// \param[out] out
|
||||
void PrintItemAt(const std::vector<dsize_t> &index, std::ostream &out) const;
|
||||
|
||||
// Get pointer to item located at `index`, caller needs to provide the type.
|
||||
// @tparam T
|
||||
// @param index vector<dsize_t>
|
||||
// @return return a pointer to the item specified at index of type `T`
|
||||
/// Get pointer to item located at `index`, caller needs to provide the type.
|
||||
/// \tparam T
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return a pointer to the item specified at index of type `T`
|
||||
template <typename T>
|
||||
Status GetItemPtr(T **, const std::vector<dsize_t> &index) const;
|
||||
|
||||
// Get pointer to string located at `index` and the length of string
|
||||
// @param index vector<dsize_t>
|
||||
// @return return a pointer to the string specified at index and the length of the string
|
||||
/// Get pointer to string located at `index` and the length of string
|
||||
/// \param[in] index vector<dsize_t>
|
||||
/// \return return a pointer to the string specified at index and the length of the string
|
||||
Status GetItemPtr(uchar **, const std::vector<dsize_t> &index, offset_t *length = nullptr) const;
|
||||
|
||||
// Given a flat index of an item string, return the start and length of the item
|
||||
// @param index flat index of the item
|
||||
// @return start address of the ths string
|
||||
// @return length of the string
|
||||
/// Given a flat index of an item string, return the start and length of the item
|
||||
/// \param[in] index flat index of the item
|
||||
/// \param[out] start address of the ths string
|
||||
/// \param[out] length of the string
|
||||
Status GetStringAt(dsize_t index, uchar **string_start, offset_t *length) const;
|
||||
|
||||
// all access to shape_ should be via shape
|
||||
/// Skip the offsets and returns the start of the buffer where the real strings is stored. Caller needs to check if
|
||||
/// the tensor's type is a string, otherwise undefined address would be returned. \return address of the first string
|
||||
/// of the tensor.
|
||||
uchar *GetStringsBuffer() const { return data_ + kOffsetSize * shape_.NumOfElements() + kOffsetSize; }
|
||||
|
||||
/// all access to shape_ should be via shape
|
||||
TensorShape shape_;
|
||||
// data type of tensor
|
||||
/// data type of tensor
|
||||
DataType type_;
|
||||
// pointer to the start of the physical data
|
||||
/// pointer to the start of the physical data
|
||||
unsigned char *data_;
|
||||
// An allocator for data_
|
||||
/// An allocator for data_
|
||||
CharAllocPtr data_allocator_;
|
||||
// pointer to the end of the physical data
|
||||
/// pointer to the end of the physical data
|
||||
unsigned char *data_end_ = nullptr;
|
||||
|
||||
private:
|
||||
/// Helper function to create a tensor from Numpy array of strings
|
||||
/// \param[in] arr Numpy array
|
||||
/// \param[out] out Created Tensor
|
||||
/// \return Status
|
||||
static Status CreateFromNpString(py::array arr, TensorPtr *out);
|
||||
|
||||
/// Copy raw data of a array based on shape and strides to the destination pointer
|
||||
/// \param dst [out] Pointer to the destination array where the content is to be copied
|
||||
/// \param[in] src Pointer to the source of strided array to be copied
|
||||
/// \param[in] shape shape of the source array
|
||||
/// \param[in] strides strides of the source array
|
||||
/// \param[in] type_size number of bytes needed to store one array element's type
|
||||
/// \return Status Code
|
||||
static Status CopyStridedArray(unsigned char *dst, unsigned char *src, std::vector<dsize_t> shape,
|
||||
std::vector<dsize_t> strides, uint8_t type_size);
|
||||
|
||||
/// const of the size of the offset variable
|
||||
static constexpr uint8_t kOffsetSize = sizeof(offset_t);
|
||||
};
|
||||
template <>
|
||||
inline Tensor::TensorIterator<std::string_view> Tensor::end<std::string_view>() {
|
||||
return TensorIterator<std::string_view>(data_, shape_.NumOfElements());
|
||||
}
|
||||
|
||||
/// Create a Tensor from a given list of strings.
|
||||
/// @note: The memory layout of a Tensor of strings consists of the Offset_array followed by the strings.
|
||||
/// The offset array will store one extra value to find the length of the last string.
|
||||
/// OFFSET_1, OFFSET_2, ..., OFFSET_n+1, STRING_1, STRING_2, ..., STRING_n
|
||||
/// The value of each offset is the start index of the corresponding string
|
||||
/// Offsets is of type offset_t
|
||||
/// strings will ne null-terminated
|
||||
/// example: Tensor(['abc', 'de'], shape={2}, type=DE_STRING)
|
||||
/// |----------------------------------------------------------------|
|
||||
/// | OFFSET ARRAY | STRINGS |
|
||||
/// | bytes 0-3 | bytes 3-6 | bytes 7-10 | bytes 11-14 | bytes 15-17 |
|
||||
/// | 11 | 15 | 18 | abc\0 | de\0 |
|
||||
/// |----------------------------------------------------------------|
|
||||
/// \param[in] items elements of the tensor
|
||||
/// \param[in] shape shape of the output tensor
|
||||
/// \param[out] out output argument to hold the created Tensor
|
||||
/// \return Status Code
|
||||
template <>
|
||||
inline Status Tensor::CreateFromVector<std::string>(const std::vector<std::string> &items, const TensorShape &shape,
|
||||
TensorPtr *out) {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(
|
||||
items.size() == shape.NumOfElements(),
|
||||
"Number of elements in the vector does not match the number of elements of the shape required");
|
||||
const TensorAlloc *alloc = GlobalContext::Instance()->tensor_allocator();
|
||||
*out = std::allocate_shared<Tensor>(*alloc, TensorShape({static_cast<dsize_t>(items.size())}),
|
||||
DataType(DataType::DE_STRING));
|
||||
if (items.size() == 0) {
|
||||
if (shape.known()) {
|
||||
return (*out)->Reshape(shape);
|
||||
}
|
||||
}
|
||||
auto length_sum = [](dsize_t sum, const std::string &s) { return s.length() + sum; };
|
||||
dsize_t total_length = std::accumulate(items.begin(), items.end(), 0, length_sum);
|
||||
|
||||
// total bytes needed = offset array + strings
|
||||
// offset array needs to store one offset var per element + 1 extra to get the length of the last string.
|
||||
// strings will be null-terminated --> need 1 extra byte per element
|
||||
dsize_t num_bytes = (kOffsetSize + 1) * (*out)->shape_.NumOfElements() + kOffsetSize + total_length;
|
||||
|
||||
(*out)->AllocateBuffer(num_bytes);
|
||||
auto offset_arr = reinterpret_cast<offset_t *>((*out)->data_);
|
||||
uchar *buf = (*out)->GetStringsBuffer();
|
||||
|
||||
offset_t offset = buf - (*out)->data_; // the first string will start here
|
||||
uint32_t i = 0;
|
||||
for (const auto &str : items) {
|
||||
// insert the start index of the string.
|
||||
offset_arr[i++] = offset;
|
||||
// total bytes are reduced by kOffsetSize
|
||||
num_bytes -= kOffsetSize;
|
||||
// insert actual string
|
||||
int ret_code = memcpy_s((*out)->data_ + offset, num_bytes, common::SafeCStr(str), str.length() + 1);
|
||||
if (ret_code != 0) MS_LOG(ERROR) << "Cannot copy string into Tensor";
|
||||
// next string will be stored right after the current one.
|
||||
offset = offset + str.length() + 1;
|
||||
// total bytes are reduced by the length of the string
|
||||
num_bytes -= str.length() + 1;
|
||||
}
|
||||
// store one more offset value so we can get the length of the last string
|
||||
// length[last_element] = offset_arr[last_element + 1] - offset_arr[last_element]
|
||||
offset_arr[i] = offset;
|
||||
|
||||
(*out)->data_end_ = (*out)->data_ + offset_arr[i];
|
||||
|
||||
MS_ASSERT(num_bytes == 0);
|
||||
if (shape.known()) {
|
||||
RETURN_IF_NOT_OK((*out)->Reshape(shape));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
/// Create a string scalar Tensor from the given value.
|
||||
/// \param[in] item value
|
||||
/// \param[out] out Created tensor
|
||||
/// \return Status code
|
||||
template <>
|
||||
inline Status Tensor::CreateScalar<std::string>(const std::string &item, TensorPtr *out) {
|
||||
return CreateFromVector<std::string>({item}, TensorShape::CreateScalar(), out);
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CORE_TENSOR_H_
|
||||
|
|
|
@ -97,7 +97,7 @@ Status OneHotEncoding(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou
|
|||
if (input->Rank() == 1) num_elements = input->shape()[0];
|
||||
TensorShape out_shape({num_elements, num_classes});
|
||||
std::shared_ptr<Tensor> out;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, out_shape, input->type()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(out_shape, input->type(), &out));
|
||||
RETURN_IF_NOT_OK(out->Zero());
|
||||
for (dsize_t i = 0; i < num_elements; ++i) {
|
||||
if (input->type().IsUnsignedInt()) {
|
||||
|
@ -133,7 +133,9 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output
|
|||
fill_output = fill_value;
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, input_shape, input_type));
|
||||
if (input_type.IsNumeric()) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(input_shape, input_type, &out));
|
||||
}
|
||||
|
||||
switch (input_type.value()) {
|
||||
case DataType::DE_BOOL: {
|
||||
|
@ -216,7 +218,7 @@ Status Fill(const std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output
|
|||
for (int i = 0; i < input_shape.NumOfElements(); i++) {
|
||||
strings.emplace_back(fill_string);
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, input_shape));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, input_shape, &out));
|
||||
break;
|
||||
}
|
||||
case DataType::DE_UNKNOWN: {
|
||||
|
@ -285,9 +287,8 @@ void CastFrom(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
|
|||
|
||||
// Type cast operator
|
||||
Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output, const DataType &data_type) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), data_type));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), data_type, output));
|
||||
|
||||
RETURN_IF_NOT_OK((*output)->AllocateBuffer((*output)->SizeInBytes()));
|
||||
switch (input->type().value()) {
|
||||
case DataType::DE_BOOL:
|
||||
CastFrom<bool>(input, output);
|
||||
|
@ -335,8 +336,7 @@ Status TypeCast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
|
|||
Status ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
// initiate new tensor for type cast
|
||||
DataType new_type = DataType("float16");
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), new_type));
|
||||
RETURN_IF_NOT_OK((*output)->AllocateBuffer((*output)->SizeInBytes()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), new_type, output));
|
||||
|
||||
auto in_itr = input->begin<float>();
|
||||
auto out_itr = (*output)->begin<float16>();
|
||||
|
@ -387,7 +387,7 @@ Status PadEndNumeric(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor>
|
|||
(*dst) = src; // if no padding, copy the pointer
|
||||
} else {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(src->Rank() == pad_shape.size(), "Pad to diff rank not allowed");
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, TensorImpl::kFlexible, TensorShape(pad_shape), src->type()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(TensorShape(pad_shape), src->type(), dst));
|
||||
auto tensor_type = src->type().value();
|
||||
if (pad_val == 0) { // if pad with zero, don't care what type it is
|
||||
RETURN_IF_NOT_OK((*dst)->Zero());
|
||||
|
@ -447,7 +447,7 @@ Status PadEndString(const std::shared_ptr<Tensor> &src, std::shared_ptr<Tensor>
|
|||
std::vector<dsize_t> cur_ind(src->Rank(), 0);
|
||||
std::vector<std::string> strings;
|
||||
RETURN_IF_NOT_OK(PadEndStringHelper(src, &strings, TensorShape(pad_shape), cur_ind, 0, pad_val));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(dst, strings, TensorShape(pad_shape)));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, TensorShape(pad_shape), dst));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -521,7 +521,7 @@ Status Mask(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
|
|||
"Cannot convert constant value to the type of the input tensor.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(value->shape() == TensorShape::CreateScalar(), "Value is not a scalar");
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), DataType(DataType::DE_BOOL)));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType(DataType::DE_BOOL), output));
|
||||
|
||||
std::unique_ptr<TypeCastOp> value_cast_op(new TypeCastOp(input->type()));
|
||||
std::shared_ptr<Tensor> casted_value;
|
||||
|
@ -629,7 +629,7 @@ Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
std::shared_ptr<Tensor> out;
|
||||
|
||||
if (input->type().IsNumeric()) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, TensorImpl::kFlexible, t, input->type()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(t, input->type(), &out));
|
||||
|
||||
RETURN_IF_NOT_OK(out->Concatenate({0}, input));
|
||||
RETURN_IF_NOT_OK(out->Concatenate({input->shape()[0]}, append));
|
||||
|
@ -645,7 +645,7 @@ Status ConcatenateHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
for (; itr != append->end<std::string_view>(); itr++) {
|
||||
strings.emplace_back(*itr);
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, strings, t));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, t, &out));
|
||||
|
||||
*output = out;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ Status DuplicateOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
IO_CHECK_VECTOR(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
|
||||
std::shared_ptr<Tensor> out;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, input[0]));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromTensor(input[0], &out));
|
||||
output->push_back(input[0]);
|
||||
output->push_back(out);
|
||||
return Status::OK();
|
||||
|
|
|
@ -63,9 +63,8 @@ int GetCVBorderType(BorderType type) {
|
|||
Status Flip(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output, int flip_code) {
|
||||
std::shared_ptr<CVTensor> input_cv = CVTensor::AsCVTensor(std::move(input));
|
||||
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
RETURN_IF_NOT_OK(output_cv->AllocateBuffer(output_cv->SizeInBytes()));
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
|
||||
if (input_cv->mat().data) {
|
||||
try {
|
||||
|
@ -110,8 +109,9 @@ Status Resize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
|
|||
TensorShape shape{output_height, output_width};
|
||||
int num_channels = input_cv->shape()[2];
|
||||
if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels);
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(shape, input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
|
||||
|
||||
auto cv_mode = GetCVInterpolationMode(mode);
|
||||
cv::resize(in_image, output_cv->mat(), cv::Size(output_width, output_height), fx, fy, cv_mode);
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
|
@ -147,8 +147,8 @@ Status DecodeCv(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
|
|||
RETURN_STATUS_UNEXPECTED(err);
|
||||
}
|
||||
cv::cvtColor(img_mat, img_mat, static_cast<int>(cv::COLOR_BGR2RGB));
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(img_mat);
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(img_mat, &output_cv));
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
return Status::OK();
|
||||
} catch (const cv::Exception &e) {
|
||||
|
@ -309,7 +309,8 @@ Status JpegCropAndDecode(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
// three number of output components, always convert to RGB and output
|
||||
constexpr int kOutNumComponents = 3;
|
||||
TensorShape ts = TensorShape({crop_h, crop_w, kOutNumComponents});
|
||||
auto output_tensor = std::make_shared<Tensor>(ts, DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<Tensor> output_tensor;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(ts, DataType(DataType::DE_UINT8), &output_tensor));
|
||||
const int buffer_size = output_tensor->SizeInBytes();
|
||||
JSAMPLE *buffer = reinterpret_cast<JSAMPLE *>(&(*output_tensor->begin<uint8_t>()));
|
||||
const int max_scanlines_to_read = skipped_scanlines + crop_h;
|
||||
|
@ -331,8 +332,8 @@ Status Rescale(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *ou
|
|||
RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor");
|
||||
}
|
||||
cv::Mat input_image = input_cv->mat();
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), DataType(DataType::DE_FLOAT32));
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
|
||||
try {
|
||||
input_image.convertTo(output_cv->mat(), CV_32F, rescale, shift);
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
|
@ -354,8 +355,8 @@ Status Crop(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *outpu
|
|||
TensorShape shape{h, w};
|
||||
int num_channels = input_cv->shape()[2];
|
||||
if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels);
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(shape, input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &output_cv));
|
||||
cv::Rect roi(x, y, w, h);
|
||||
(input_cv->mat())(roi).copyTo(output_cv->mat());
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
|
@ -386,10 +387,11 @@ Status HwcToChw(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *output)
|
|||
int height = input_cv->shape()[0];
|
||||
int width = input_cv->shape()[1];
|
||||
|
||||
auto output_cv = std::make_unique<CVTensor>(TensorShape{num_channels, height, width}, input_cv->type());
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
CVTensor::CreateEmpty(TensorShape{num_channels, height, width}, input_cv->type(), &output_cv);
|
||||
for (int i = 0; i < num_channels; ++i) {
|
||||
cv::Mat mat;
|
||||
RETURN_IF_NOT_OK(output_cv->Mat({i}, &mat));
|
||||
RETURN_IF_NOT_OK(output_cv->MatAtIndex({i}, &mat));
|
||||
cv::extractChannel(input_cv->mat(), mat, i);
|
||||
}
|
||||
*output = std::move(output_cv);
|
||||
|
@ -406,8 +408,9 @@ Status SwapRedAndBlue(std::shared_ptr<Tensor> input, std::shared_ptr<Tensor> *ou
|
|||
if (input_cv->shape().Size() != 3 || num_channels != 3) {
|
||||
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
|
||||
}
|
||||
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
|
||||
cv::cvtColor(input_cv->mat(), output_cv->mat(), static_cast<int>(cv::COLOR_BGR2RGB));
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
return Status::OK();
|
||||
|
@ -440,8 +443,8 @@ Status CropAndResize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tenso
|
|||
TensorShape shape{target_height, target_width};
|
||||
int num_channels = input_cv->shape()[2];
|
||||
if (input_cv->Rank() == 3) shape = shape.AppendDim(num_channels);
|
||||
std::shared_ptr<CVTensor> cvt_out = std::make_shared<CVTensor>(shape, input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(cvt_out);
|
||||
std::shared_ptr<CVTensor> cvt_out;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(shape, input_cv->type(), &cvt_out));
|
||||
cv::resize(cv_in(roi), cvt_out->mat(), cv::Size(target_width, target_height), 0, 0, cv_mode);
|
||||
*output = std::static_pointer_cast<Tensor>(cvt_out);
|
||||
return Status::OK();
|
||||
|
@ -475,8 +478,7 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
|
|||
if (!expand) {
|
||||
// this case means that the shape doesn't change, size stays the same
|
||||
// We may not need this memcpy if it is in place.
|
||||
output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
// using inter_nearest to comply with python default
|
||||
cv::warpAffine(input_img, output_cv->mat(), rot, input_img.size(), GetCVInterpolationMode(interpolation),
|
||||
cv::BORDER_CONSTANT, fill_color);
|
||||
|
@ -489,7 +491,7 @@ Status Rotate(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *out
|
|||
// use memcpy and don't compute the new shape since openCV has a rounding problem
|
||||
cv::warpAffine(input_img, output_img, rot, bbox.size(), GetCVInterpolationMode(interpolation),
|
||||
cv::BORDER_CONSTANT, fill_color);
|
||||
output_cv = std::make_shared<CVTensor>(output_img);
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(output_img, &output_cv));
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
}
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
|
@ -506,8 +508,8 @@ Status Normalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
|
|||
RETURN_STATUS_UNEXPECTED("Could not convert to CV Tensor");
|
||||
}
|
||||
cv::Mat in_image = input_cv->mat();
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), DataType(DataType::DE_FLOAT32));
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), DataType(DataType::DE_FLOAT32), &output_cv));
|
||||
mean->Squeeze();
|
||||
if (mean->type() != DataType::DE_FLOAT32 || mean->Rank() != 1 || mean->shape()[0] != 3) {
|
||||
std::string err_msg = "Mean tensor should be of size 3 and type float.";
|
||||
|
@ -548,8 +550,8 @@ Status AdjustBrightness(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
|
|||
if (input_cv->Rank() != 3 || num_channels != 3) {
|
||||
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
|
||||
}
|
||||
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
output_cv->mat() = input_img * alpha;
|
||||
*output = std::static_pointer_cast<Tensor>(output_cv);
|
||||
} catch (const cv::Exception &e) {
|
||||
|
@ -572,8 +574,8 @@ Status AdjustContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tens
|
|||
cv::Mat gray, output_img;
|
||||
cv::cvtColor(input_img, gray, CV_RGB2GRAY);
|
||||
int mean_img = static_cast<int>(cv::mean(gray).val[0] + 0.5);
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
output_img = cv::Mat::zeros(input_img.rows, input_img.cols, CV_8UC1);
|
||||
output_img = output_img + mean_img;
|
||||
cv::cvtColor(output_img, output_img, CV_GRAY2RGB);
|
||||
|
@ -680,7 +682,9 @@ Status AutoContrast(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor
|
|||
cv::Mat result;
|
||||
cv::merge(image_result, result);
|
||||
result.convertTo(result, input_cv->mat().type());
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(result);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
|
||||
(*output) = std::static_pointer_cast<Tensor>(output_cv);
|
||||
(*output) = std::static_pointer_cast<Tensor>(output_cv);
|
||||
(*output)->Reshape(input->shape());
|
||||
} catch (const cv::Exception &e) {
|
||||
|
@ -700,8 +704,8 @@ Status AdjustSaturation(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
|
|||
if (input_cv->Rank() != 3 || num_channels != 3) {
|
||||
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
|
||||
}
|
||||
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
cv::Mat output_img = output_cv->mat();
|
||||
cv::Mat gray;
|
||||
cv::cvtColor(input_img, gray, CV_RGB2GRAY);
|
||||
|
@ -729,8 +733,8 @@ Status AdjustHue(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *
|
|||
if (input_cv->Rank() != 3 || num_channels != 3) {
|
||||
RETURN_STATUS_UNEXPECTED("The shape is incorrect: number of channels does not equal 3");
|
||||
}
|
||||
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
cv::Mat output_img;
|
||||
cv::cvtColor(input_img, output_img, CV_RGB2HSV_FULL);
|
||||
for (int y = 0; y < output_img.cols; y++) {
|
||||
|
@ -781,7 +785,8 @@ Status Equalize(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *o
|
|||
}
|
||||
cv::Mat result;
|
||||
cv::merge(image_result, result);
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(result);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(result, &output_cv));
|
||||
(*output) = std::static_pointer_cast<Tensor>(output_cv);
|
||||
(*output)->Reshape(input->shape());
|
||||
} catch (const cv::Exception &e) {
|
||||
|
@ -867,8 +872,8 @@ Status Pad(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output
|
|||
} else {
|
||||
cv::copyMakeBorder(input_cv->mat(), out_image, pad_top, pad_bottom, pad_left, pad_right, b_type);
|
||||
}
|
||||
std::shared_ptr<CVTensor> output_cv = std::make_shared<CVTensor>(out_image);
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateFromMat(out_image, &output_cv));
|
||||
// pad the dimension if shape information is only 2 dimensional, this is grayscale
|
||||
int num_channels = input_cv->shape()[2];
|
||||
if (input_cv->Rank() == 3 && num_channels == 1 && output_cv->Rank() == 2) output_cv->ExpandDim(2);
|
||||
|
@ -932,7 +937,7 @@ Status UpdateBBoxesForCrop(std::shared_ptr<Tensor> *bboxList, size_t *bboxCount,
|
|||
}
|
||||
}
|
||||
std::shared_ptr<Tensor> retV;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&retV, copyVals, TensorShape({static_cast<dsize_t>(*bboxCount), bboxDim})));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(copyVals, TensorShape({static_cast<dsize_t>(*bboxCount), bboxDim}), &retV));
|
||||
(*bboxList) = retV; // reset pointer
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -40,8 +40,8 @@ Status InvertOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
if (num_channels != 3) {
|
||||
RETURN_STATUS_UNEXPECTED("The shape is incorrect: num of channels != 3");
|
||||
}
|
||||
|
||||
auto output_cv = std::make_shared<CVTensor>(input_cv->shape(), input_cv->type());
|
||||
std::shared_ptr<CVTensor> output_cv;
|
||||
RETURN_IF_NOT_OK(CVTensor::CreateEmpty(input_cv->shape(), input_cv->type(), &output_cv));
|
||||
RETURN_UNEXPECTED_IF_NULL(output_cv);
|
||||
|
||||
output_cv->mat() = cv::Scalar::all(255) - input_img;
|
||||
|
|
|
@ -24,20 +24,14 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
NormalizeOp::NormalizeOp(float mean_r, float mean_g, float mean_b, float std_r, float std_g, float std_b) {
|
||||
int size[] = {3};
|
||||
cv::Mat mean_cv(1, size, CV_32F);
|
||||
mean_cv.at<float>(0) = mean_r;
|
||||
mean_cv.at<float>(1) = mean_g;
|
||||
mean_cv.at<float>(2) = mean_b;
|
||||
mean_ = std::make_shared<CVTensor>(mean_cv);
|
||||
mean_->Squeeze();
|
||||
|
||||
cv::Mat std_cv(1, size, CV_32F);
|
||||
std_cv.at<float>(0) = std_r;
|
||||
std_cv.at<float>(1) = std_g;
|
||||
std_cv.at<float>(2) = std_b;
|
||||
std_ = std::make_shared<CVTensor>(std_cv);
|
||||
std_->Squeeze();
|
||||
Status s = Tensor::CreateFromVector<float>({mean_r, mean_g, mean_b}, &mean_);
|
||||
if (s.IsError()) {
|
||||
MS_LOG(ERROR) << "Could not create mean tensor.";
|
||||
}
|
||||
s = Tensor::CreateFromVector<float>({std_r, std_g, std_b}, &std_);
|
||||
if (s.IsError()) {
|
||||
MS_LOG(ERROR) << "Could not create std tensor.";
|
||||
}
|
||||
}
|
||||
|
||||
Status NormalizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
|
@ -47,9 +41,7 @@ Status NormalizeOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_pt
|
|||
}
|
||||
|
||||
void NormalizeOp::Print(std::ostream &out) const {
|
||||
out << "NormalizeOp, mean: " << mean_->mat().at<float>(0) << ", " << mean_->mat().at<float>(1) << ", "
|
||||
<< mean_->mat().at<float>(2) << "std: " << std_->mat().at<float>(0) << ", " << std_->mat().at<float>(1) << ", "
|
||||
<< std_->mat().at<float>(2) << std::endl;
|
||||
out << "NormalizeOp, mean: " << mean_ << std::endl << "std: " << std_ << std::endl;
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -39,8 +39,8 @@ class NormalizeOp : public TensorOp {
|
|||
std::string Name() const override { return kNormalizeOp; }
|
||||
|
||||
private:
|
||||
std::shared_ptr<CVTensor> mean_;
|
||||
std::shared_ptr<CVTensor> std_;
|
||||
std::shared_ptr<Tensor> mean_;
|
||||
std::shared_ptr<Tensor> std_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -49,7 +49,7 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
if (py::isinstance<py::array>(ret_py_obj)) {
|
||||
// In case of a n-1 mapping, the return value will be a numpy array
|
||||
std::shared_ptr<Tensor> out;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, ret_py_obj.cast<py::array>()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_obj.cast<py::array>(), &out));
|
||||
output->push_back(out);
|
||||
} else if (py::isinstance<py::tuple>(ret_py_obj)) {
|
||||
// In case of a n-m mapping, the return value will be a tuple of numpy arrays
|
||||
|
@ -61,7 +61,7 @@ Status PyFuncOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
goto ShapeMisMatch;
|
||||
}
|
||||
std::shared_ptr<Tensor> out;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&out, ret_py_ele.cast<py::array>()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromNpArray(ret_py_ele.cast<py::array>(), &out));
|
||||
output->push_back(out);
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -136,8 +136,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor
|
|||
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
|
||||
RETURN_IF_NOT_OK(CaseFoldWithoutUnusedWords(*iter, kUnusedWords, &strs[i++]));
|
||||
}
|
||||
*output = std::make_shared<Tensor>(std::move(strs), input->shape());
|
||||
return Status::OK();
|
||||
return Tensor::CreateFromVector(strs, input->shape(), output);
|
||||
}
|
||||
|
||||
Status BasicTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
|
|
|
@ -39,8 +39,7 @@ Status CaseFoldOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr
|
|||
nfkc_case_fold->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed.");
|
||||
}
|
||||
*output = std::make_shared<Tensor>(std::move(strs), input->shape());
|
||||
return Status::OK();
|
||||
return Tensor::CreateFromVector(strs, input->shape(), output);
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -33,12 +33,7 @@ Status SlidingWindowHelper(const std::shared_ptr<Tensor> &input, std::shared_ptr
|
|||
// if the data row has fewer items than width, the corresponding result row will be empty
|
||||
if (out_shape.Size() == 0) {
|
||||
MS_LOG(WARNING) << "The data row has fewer items than width, the result will be empty.";
|
||||
if (input->type().value() == DataType::DE_STRING) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, std::vector<std::string>{}, TensorShape({0})));
|
||||
} else {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, TensorShape({0}), input->type()));
|
||||
}
|
||||
return Status::OK();
|
||||
return Tensor::CreateEmpty(TensorShape({0}), input->type(), output);
|
||||
}
|
||||
|
||||
axis = Tensor::HandleNeg(axis, input->shape().Size());
|
||||
|
|
|
@ -68,15 +68,12 @@ Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
offsets_limit.push_back(static_cast<uint32_t>(item.offset + item.word.length()));
|
||||
}
|
||||
}
|
||||
token_tensor = std::make_shared<Tensor>(words, TensorShape({(dsize_t)words.size()}));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(words, &token_tensor));
|
||||
output->push_back(token_tensor);
|
||||
if (with_offsets_) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_start[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
|
||||
|
||||
output->push_back(offsets_start_tensor);
|
||||
output->push_back(offsets_limit_tensor);
|
||||
}
|
||||
|
|
|
@ -36,9 +36,7 @@ Status LookupOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<T
|
|||
word_ids.back() != Vocab::kNoTokenExists,
|
||||
"Lookup Error: token: " + std::string(*itr) + " doesn't exist in vocab and no unknown token is specified.");
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, TensorImpl::kFlexible, input->shape(), type_,
|
||||
reinterpret_cast<unsigned char *>(word_ids.data())));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(word_ids, input->shape(), output));
|
||||
return Status::OK();
|
||||
}
|
||||
Status LookupOp::OutputType(const std::vector<DataType> &inputs, std::vector<DataType> &outputs) {
|
||||
|
|
|
@ -67,7 +67,7 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
|
|||
}
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, res, TensorShape({static_cast<dsize_t>(res.size())})));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(res, TensorShape({static_cast<dsize_t>(res.size())}), output));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -68,8 +68,7 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
|
|||
normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed.");
|
||||
}
|
||||
*output = std::make_shared<Tensor>(std::move(strs), input->shape());
|
||||
return Status::OK();
|
||||
return Tensor::CreateFromVector(strs, input->shape(), output);
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -50,8 +50,7 @@ Status RegexReplaceOp::Compute(const std::shared_ptr<Tensor> &input, std::shared
|
|||
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
|
||||
RETURN_IF_NOT_OK(RegexReplace(&matcher, *iter, &strs[i]));
|
||||
}
|
||||
*output = std::make_shared<Tensor>(std::move(strs), input->shape());
|
||||
return Status::OK();
|
||||
return Tensor::CreateFromVector(strs, input->shape(), output);
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -120,15 +120,11 @@ Status RegexTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
|||
std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
|
||||
RETURN_IF_NOT_OK(input[0]->GetItemAt(&text, {}));
|
||||
RETURN_IF_NOT_OK(GetRegexTokens(std::string(text.data(), text.size()), &tokens, &offsets_start, &offsets_limit));
|
||||
token_tensor = std::make_shared<Tensor>(std::move(tokens), TensorShape({(dsize_t)tokens.size()}));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(std::move(tokens), &token_tensor));
|
||||
output->push_back(token_tensor);
|
||||
if (with_offsets_) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_start[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
|
||||
output->push_back(offsets_start_tensor);
|
||||
output->push_back(offsets_limit_tensor);
|
||||
}
|
||||
|
|
|
@ -64,14 +64,14 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
|
|||
if (!status.ok()) {
|
||||
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
|
||||
}
|
||||
*output = std::make_unique<Tensor>(pieces, TensorShape({(dsize_t)pieces.size()}));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(pieces, output));
|
||||
} else {
|
||||
std::vector<int> ids;
|
||||
auto status = processor_.Encode(sentence, &ids);
|
||||
if (!status.ok()) {
|
||||
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, ids, TensorShape({(dsize_t)ids.size()})));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(ids, output));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -114,7 +114,7 @@ Status ToNumberOp::ToSignedIntegral(const std::shared_ptr<Tensor> &input, std::s
|
|||
casted.push_back(casted_result);
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -157,7 +157,7 @@ Status ToNumberOp::ToUnsignedIntegral(const std::shared_ptr<Tensor> &input, std:
|
|||
casted.push_back(casted_result);
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -165,7 +165,7 @@ Status ToNumberOp::ToFloat16(const std::shared_ptr<Tensor> &input, std::shared_p
|
|||
// special case, float16 does not exist in c++, no native support for
|
||||
// casting, so cast to float first then use this method, which use Eigen.
|
||||
std::shared_ptr<Tensor> temp;
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&temp, TensorImpl::kFlexible, input->shape(), DataType("float32")));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateEmpty(input->shape(), DataType("float32"), &temp));
|
||||
RETURN_IF_NOT_OK(ToFloat(input, &temp));
|
||||
RETURN_IF_NOT_OK(mindspore::dataset::ToFloat16(temp, output));
|
||||
return Status::OK();
|
||||
|
@ -200,7 +200,7 @@ Status ToNumberOp::ToFloat(const std::shared_ptr<Tensor> &input, std::shared_ptr
|
|||
casted.push_back(casted_result);
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -233,7 +233,7 @@ Status ToNumberOp::ToDouble(const std::shared_ptr<Tensor> &input, std::shared_pt
|
|||
casted.push_back(casted_result);
|
||||
}
|
||||
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(output, casted, input->shape()));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(casted, input->shape(), output));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -55,15 +55,13 @@ Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output
|
|||
offsets_start.push_back(0);
|
||||
offsets_limit.push_back(0);
|
||||
}
|
||||
token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(splits, &token_tensor));
|
||||
|
||||
output->push_back(token_tensor);
|
||||
if (with_offsets_) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_start[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
|
||||
|
||||
output->push_back(offsets_start_tensor);
|
||||
output->push_back(offsets_limit_tensor);
|
||||
}
|
||||
|
|
|
@ -96,15 +96,12 @@ Status UnicodeScriptTokenizerOp::Compute(const TensorRow &input, TensorRow *outp
|
|||
offsets_start.push_back(0);
|
||||
offsets_limit.push_back(0);
|
||||
}
|
||||
token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(splits, &token_tensor));
|
||||
output->push_back(token_tensor);
|
||||
if (with_offsets_) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_start[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
|
||||
|
||||
output->push_back(offsets_start_tensor);
|
||||
output->push_back(offsets_limit_tensor);
|
||||
}
|
||||
|
|
|
@ -79,15 +79,12 @@ Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output)
|
|||
offsets_start.push_back(0);
|
||||
offsets_limit.push_back(0);
|
||||
}
|
||||
token_tensor = std::make_shared<Tensor>(splits, TensorShape({(dsize_t)splits.size()}));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(splits, &token_tensor));
|
||||
output->push_back(token_tensor);
|
||||
if (with_offsets_) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_start[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
|
||||
|
||||
output->push_back(offsets_start_tensor);
|
||||
output->push_back(offsets_limit_tensor);
|
||||
}
|
||||
|
|
|
@ -1,157 +1,154 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
const char WordpieceTokenizerOp::kDefSuffixIndicator[] = "##";
|
||||
const int WordpieceTokenizerOp::kDefMaxBytesPerToken = 100;
|
||||
const char WordpieceTokenizerOp::kDefUnknownToken[] = "[UNK]";
|
||||
const bool WordpieceTokenizerOp::kDefWithOffsets = false;
|
||||
|
||||
WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator,
|
||||
const int &max_bytes_per_token, const std::string &unknown_token,
|
||||
const bool &with_offsets)
|
||||
: vocab_(vocab),
|
||||
suffix_indicator_(suffix_indicator),
|
||||
max_bytes_per_token_(max_bytes_per_token),
|
||||
unknown_token_(unknown_token),
|
||||
with_offsets_(with_offsets) {}
|
||||
|
||||
Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start,
|
||||
bool *out_found, int *out_end) const {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && start < input_token.size(), "Out of range");
|
||||
*out_found = false;
|
||||
for (int i = runes.size() - 1; i >= 0; i--) {
|
||||
*out_end = runes[i].offset + runes[i].len;
|
||||
int len = *out_end - start;
|
||||
std::string word = input_token.substr(start, len);
|
||||
if (start > 0) {
|
||||
word = suffix_indicator_ + word;
|
||||
}
|
||||
if (vocab_->Lookup(word) != Vocab::kNoTokenExists) {
|
||||
*out_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, const uint32_t &basic_start,
|
||||
std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
|
||||
std::vector<uint32_t> *offsets_limit) const {
|
||||
out_tokens->clear();
|
||||
offsets_start->push_back(basic_start);
|
||||
if (unknown_token_.empty()) {
|
||||
out_tokens->emplace_back(input_token);
|
||||
offsets_limit->push_back(basic_start + input_token.length());
|
||||
} else {
|
||||
out_tokens->emplace_back(unknown_token_);
|
||||
offsets_limit->push_back(basic_start + input_token.length());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int &start, const int &end,
|
||||
std::vector<std::string> *out_tokens) const {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && end > start && end <= input_token.size(), "Out of range");
|
||||
std::string subword = input_token.substr(start, end - start);
|
||||
if (start > 0) {
|
||||
subword = suffix_indicator_ + subword;
|
||||
}
|
||||
out_tokens->emplace_back(subword);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uint32_t &basic_start,
|
||||
std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
|
||||
std::vector<uint32_t> *offsets_limit) const {
|
||||
if (input_token.size() > max_bytes_per_token_) {
|
||||
offsets_start->push_back(basic_start);
|
||||
if (!unknown_token_.empty()) {
|
||||
offsets_limit->push_back(basic_start + unknown_token_.size());
|
||||
out_tokens->emplace_back(unknown_token_);
|
||||
} else {
|
||||
out_tokens->emplace_back(input_token);
|
||||
offsets_limit->push_back(basic_start + input_token.size());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
RuneStrArray runes;
|
||||
if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) {
|
||||
RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
|
||||
}
|
||||
int end = 0;
|
||||
for (int start = 0; start < input_token.size();) {
|
||||
bool found = false;
|
||||
RETURN_IF_NOT_OK(LookupWord(input_token, runes, start, &found, &end));
|
||||
if (found) {
|
||||
RETURN_IF_NOT_OK(AddSubword(input_token, start, end, out_tokens));
|
||||
offsets_start->push_back(static_cast<uint32_t>(basic_start + start));
|
||||
offsets_limit->push_back(static_cast<uint32_t>(basic_start + end));
|
||||
start = end;
|
||||
} else {
|
||||
return FoundNoToken(input_token, basic_start, out_tokens, offsets_start, offsets_limit);
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
IO_CHECK_VECTOR(input, output);
|
||||
if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
|
||||
}
|
||||
dsize_t count = 0;
|
||||
std::vector<std::string> out_tokens;
|
||||
std::vector<uint32_t> offsets_start, offsets_limit;
|
||||
std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
|
||||
for (auto iter = input[0]->begin<std::string_view>(); iter != input[0]->end<std::string_view>(); iter++) {
|
||||
uint32_t basic_start = 0;
|
||||
std::vector<std::string> temp_tokens;
|
||||
if (with_offsets_ && input.size() == 3) {
|
||||
RETURN_IF_NOT_OK(input[1]->GetItemAt<uint32_t>(&basic_start, {count, 0}));
|
||||
}
|
||||
RETURN_IF_NOT_OK(GetTokens(std::string(*iter), basic_start, &temp_tokens, &offsets_start, &offsets_limit));
|
||||
out_tokens.insert(out_tokens.end(), temp_tokens.begin(), temp_tokens.end());
|
||||
count++;
|
||||
}
|
||||
if (out_tokens.empty()) {
|
||||
out_tokens.emplace_back("");
|
||||
offsets_start.push_back(0);
|
||||
offsets_limit.push_back(0);
|
||||
}
|
||||
token_tensor = std::make_shared<Tensor>(out_tokens, TensorShape({(dsize_t)out_tokens.size()}));
|
||||
output->push_back(token_tensor);
|
||||
if (with_offsets_) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_start_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_start.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_start[0])));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(&offsets_limit_tensor, TensorImpl::kFlexible,
|
||||
TensorShape({(dsize_t)offsets_limit.size()}), DataType(DataType::DE_UINT32),
|
||||
reinterpret_cast<unsigned char *>(&offsets_limit[0])));
|
||||
output->push_back(offsets_start_tensor);
|
||||
output->push_back(offsets_limit_tensor);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/text/kernels/wordpiece_tokenizer_op.h"
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
const char WordpieceTokenizerOp::kDefSuffixIndicator[] = "##";
|
||||
const int WordpieceTokenizerOp::kDefMaxBytesPerToken = 100;
|
||||
const char WordpieceTokenizerOp::kDefUnknownToken[] = "[UNK]";
|
||||
const bool WordpieceTokenizerOp::kDefWithOffsets = false;
|
||||
|
||||
WordpieceTokenizerOp::WordpieceTokenizerOp(const std::shared_ptr<Vocab> &vocab, const std::string &suffix_indicator,
|
||||
const int &max_bytes_per_token, const std::string &unknown_token,
|
||||
const bool &with_offsets)
|
||||
: vocab_(vocab),
|
||||
suffix_indicator_(suffix_indicator),
|
||||
max_bytes_per_token_(max_bytes_per_token),
|
||||
unknown_token_(unknown_token),
|
||||
with_offsets_(with_offsets) {}
|
||||
|
||||
Status WordpieceTokenizerOp::LookupWord(const std::string &input_token, const RuneStrArray &runes, const int start,
|
||||
bool *out_found, int *out_end) const {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && start < input_token.size(), "Out of range");
|
||||
*out_found = false;
|
||||
for (int i = runes.size() - 1; i >= 0; i--) {
|
||||
*out_end = runes[i].offset + runes[i].len;
|
||||
int len = *out_end - start;
|
||||
std::string word = input_token.substr(start, len);
|
||||
if (start > 0) {
|
||||
word = suffix_indicator_ + word;
|
||||
}
|
||||
if (vocab_->Lookup(word) != Vocab::kNoTokenExists) {
|
||||
*out_found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::FoundNoToken(const std::string &input_token, const uint32_t &basic_start,
|
||||
std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
|
||||
std::vector<uint32_t> *offsets_limit) const {
|
||||
out_tokens->clear();
|
||||
offsets_start->push_back(basic_start);
|
||||
if (unknown_token_.empty()) {
|
||||
out_tokens->emplace_back(input_token);
|
||||
offsets_limit->push_back(basic_start + input_token.length());
|
||||
} else {
|
||||
out_tokens->emplace_back(unknown_token_);
|
||||
offsets_limit->push_back(basic_start + input_token.length());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::AddSubword(const std::string &input_token, const int &start, const int &end,
|
||||
std::vector<std::string> *out_tokens) const {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(start >= 0 && end > start && end <= input_token.size(), "Out of range");
|
||||
std::string subword = input_token.substr(start, end - start);
|
||||
if (start > 0) {
|
||||
subword = suffix_indicator_ + subword;
|
||||
}
|
||||
out_tokens->emplace_back(subword);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uint32_t &basic_start,
|
||||
std::vector<std::string> *out_tokens, std::vector<uint32_t> *offsets_start,
|
||||
std::vector<uint32_t> *offsets_limit) const {
|
||||
if (input_token.size() > max_bytes_per_token_) {
|
||||
offsets_start->push_back(basic_start);
|
||||
if (!unknown_token_.empty()) {
|
||||
offsets_limit->push_back(basic_start + unknown_token_.size());
|
||||
out_tokens->emplace_back(unknown_token_);
|
||||
} else {
|
||||
out_tokens->emplace_back(input_token);
|
||||
offsets_limit->push_back(basic_start + input_token.size());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
RuneStrArray runes;
|
||||
if (!DecodeRunesInString(input_token.data(), input_token.size(), runes)) {
|
||||
RETURN_STATUS_UNEXPECTED("Decode utf8 string failed.");
|
||||
}
|
||||
int end = 0;
|
||||
for (int start = 0; start < input_token.size();) {
|
||||
bool found = false;
|
||||
RETURN_IF_NOT_OK(LookupWord(input_token, runes, start, &found, &end));
|
||||
if (found) {
|
||||
RETURN_IF_NOT_OK(AddSubword(input_token, start, end, out_tokens));
|
||||
offsets_start->push_back(static_cast<uint32_t>(basic_start + start));
|
||||
offsets_limit->push_back(static_cast<uint32_t>(basic_start + end));
|
||||
start = end;
|
||||
} else {
|
||||
return FoundNoToken(input_token, basic_start, out_tokens, offsets_start, offsets_limit);
|
||||
}
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
IO_CHECK_VECTOR(input, output);
|
||||
if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
|
||||
}
|
||||
dsize_t count = 0;
|
||||
std::vector<std::string> out_tokens;
|
||||
std::vector<uint32_t> offsets_start, offsets_limit;
|
||||
std::shared_ptr<Tensor> token_tensor, offsets_start_tensor, offsets_limit_tensor;
|
||||
for (auto iter = input[0]->begin<std::string_view>(); iter != input[0]->end<std::string_view>(); iter++) {
|
||||
uint32_t basic_start = 0;
|
||||
std::vector<std::string> temp_tokens;
|
||||
if (with_offsets_ && input.size() == 3) {
|
||||
RETURN_IF_NOT_OK(input[1]->GetItemAt<uint32_t>(&basic_start, {count, 0}));
|
||||
}
|
||||
RETURN_IF_NOT_OK(GetTokens(std::string(*iter), basic_start, &temp_tokens, &offsets_start, &offsets_limit));
|
||||
out_tokens.insert(out_tokens.end(), temp_tokens.begin(), temp_tokens.end());
|
||||
count++;
|
||||
}
|
||||
if (out_tokens.empty()) {
|
||||
out_tokens.emplace_back("");
|
||||
offsets_start.push_back(0);
|
||||
offsets_limit.push_back(0);
|
||||
}
|
||||
Tensor::CreateFromVector(out_tokens, &token_tensor);
|
||||
output->push_back(token_tensor);
|
||||
if (with_offsets_) {
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_start, &offsets_start_tensor));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(offsets_limit, &offsets_limit_tensor));
|
||||
|
||||
output->push_back(offsets_start_tensor);
|
||||
output->push_back(offsets_limit_tensor);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -90,8 +90,8 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatch) {
|
|||
rc = di.GetNextAsMap(&tensor_map);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
std::shared_ptr<de::Tensor> t;
|
||||
rc = de::Tensor::CreateTensor(&t, TensorImpl::kFlexible, de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload);
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload, &t);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
// verify the actual data in Tensor is correct
|
||||
EXPECT_EQ(*t == *tensor_map["col_sint64"], true);
|
||||
|
@ -119,14 +119,14 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
|
|||
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
|
||||
de::DatasetIterator di(tree);
|
||||
std::shared_ptr<de::Tensor> t1, t2, t3;
|
||||
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload);
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload, &t1);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 7));
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 7), &t2);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = de::Tensor::CreateTensor(&t3, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 2));
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 2), &t3);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
TensorMap tensor_map;
|
||||
|
@ -164,17 +164,17 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
|
|||
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
|
||||
de::DatasetIterator di(tree);
|
||||
std::shared_ptr<de::Tensor> t1, t2, t3, t4;
|
||||
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload);
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload, &t1);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 7));
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 7), &t2);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = de::Tensor::CreateTensor(&t3, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 2));
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 2), &t3);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = de::Tensor::CreateTensor(&t4, TensorImpl::kFlexible, de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 9));
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({3, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 9), &t4);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
TensorMap tensor_map;
|
||||
|
@ -216,11 +216,11 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
|
|||
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
|
||||
de::DatasetIterator di(tree);
|
||||
std::shared_ptr<de::Tensor> t1, t2;
|
||||
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload);
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({7, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload, &t1);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 7));
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 7), &t2);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
TensorMap tensor_map;
|
||||
|
@ -262,11 +262,11 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
|
|||
-9223372036854775807 - 1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 9223372036854775807};
|
||||
de::DatasetIterator di(tree);
|
||||
std::shared_ptr<de::Tensor> t1, t2;
|
||||
rc = de::Tensor::CreateTensor(&t1, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload);
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload, &t1);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = de::Tensor::CreateTensor(&t2, TensorImpl::kFlexible, de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 5));
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({5, 1}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)(payload + 5), &t2);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
TensorMap tensor_map;
|
||||
|
@ -300,7 +300,7 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
|
|||
std::shared_ptr<BatchOp> op;
|
||||
PadInfo m;
|
||||
std::shared_ptr<Tensor> pad_value;
|
||||
Tensor::CreateTensor(&pad_value, TensorImpl::kFlexible, TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32));
|
||||
Tensor::CreateEmpty(TensorShape::CreateScalar(), DataType(DataType::DE_FLOAT32), &pad_value);
|
||||
pad_value->SetItemAt<float>({}, -1);
|
||||
m.insert({"col_1d", std::make_pair(TensorShape({4}), pad_value)});
|
||||
de::BatchOp::Builder(12).SetDrop(false).SetPaddingMap(m, true).Build(&op);
|
||||
|
@ -359,8 +359,8 @@ TEST_F(MindDataTestBatchOp, TestSimpleBatchPadding) {
|
|||
-1,
|
||||
-1};
|
||||
std::shared_ptr<de::Tensor> t;
|
||||
rc = de::Tensor::CreateTensor(&t, TensorImpl::kFlexible, de::TensorShape({12, 4}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload);
|
||||
rc = de::Tensor::CreateFromMemory(de::TensorShape({12, 4}), de::DataType(DataType::DE_INT64),
|
||||
(unsigned char *)payload, &t);
|
||||
de::DatasetIterator di(tree);
|
||||
TensorMap tensor_map;
|
||||
rc = di.GetNextAsMap(&tensor_map);
|
||||
|
|
|
@ -75,7 +75,8 @@ TEST_F(MindDataTestCacheOp, TestCacheServer) {
|
|||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
// Create a tensor, take a snapshot and restore it back, and compare.
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_UINT64), &t);
|
||||
t->SetItemAt<uint64_t>({0, 0}, 1);
|
||||
t->SetItemAt<uint64_t>({0, 1}, 2);
|
||||
t->SetItemAt<uint64_t>({0, 2}, 3);
|
||||
|
@ -129,7 +130,8 @@ TEST_F(MindDataTestCacheOp, TestConcurrencyRequest) {
|
|||
rc = myClient.CreateCache(1, true);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
std::cout << myClient << std::endl;
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_UINT64), &t);
|
||||
t->SetItemAt<uint64_t>({0, 0}, 1);
|
||||
t->SetItemAt<uint64_t>({0, 1}, 2);
|
||||
t->SetItemAt<uint64_t>({0, 2}, 3);
|
||||
|
@ -403,11 +405,7 @@ TEST_F(MindDataTestCacheOp, TestImageFolderCacheMerge) {
|
|||
// replace it with the required tree structures for cache lookup op and cache merge op.
|
||||
|
||||
std::shared_ptr<CacheOp> myCacheOp;
|
||||
rc = CacheOp::Builder()
|
||||
.SetNumWorkers(4)
|
||||
.SetClient(myClient)
|
||||
.SetRowsPerBuffer(3)
|
||||
.Build(&myCacheOp);
|
||||
rc = CacheOp::Builder().SetNumWorkers(4).SetClient(myClient).SetRowsPerBuffer(3).Build(&myCacheOp);
|
||||
|
||||
std::shared_ptr<ImageFolderOp> so;
|
||||
ImageFolderOp::Builder builder;
|
||||
|
|
|
@ -36,7 +36,7 @@ TEST_F(MindDataTestChannelSwap, TestOp) {
|
|||
int size_buffer = s[0] * s[1] * s[2];
|
||||
|
||||
std::unique_ptr<uchar[]> output_buffer(new uchar[size_buffer]);
|
||||
std::shared_ptr<Tensor> output_tensor(new Tensor(s, DataType(DataType::DE_UINT8)));
|
||||
std::shared_ptr<Tensor> output_tensor;
|
||||
|
||||
// Decoding
|
||||
std::unique_ptr<HwcToChwOp> op(new HwcToChwOp());
|
||||
|
|
|
@ -163,8 +163,11 @@ void BBoxOpCommon::CompareActualAndExpected(const std::string &op_name) {
|
|||
// after comparison is done remove temporary file
|
||||
EXPECT_TRUE(remove(actual_path.c_str()) == 0);
|
||||
// compare using ==operator by Tensor
|
||||
std::shared_ptr<CVTensor> expect_img_t, actual_img_t;
|
||||
CVTensor::CreateFromMat(expect_img, &expect_img_t);
|
||||
CVTensor::CreateFromMat(actual_img, &actual_img_t);
|
||||
if (actual_img.data) {
|
||||
EXPECT_EQ(CVTensor(expect_img) == CVTensor(actual_img), true);
|
||||
EXPECT_EQ(*expect_img_t == *actual_img_t, true);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Not pass verification! Image data is null.";
|
||||
EXPECT_EQ(0, 1);
|
||||
|
@ -223,7 +226,7 @@ bool BBoxOpCommon::LoadAnnotationFile(const std::string &path, std::shared_ptr<T
|
|||
object = object->NextSiblingElement("object"); // Read next BBox if exists
|
||||
}
|
||||
std::shared_ptr<Tensor> ret_value;
|
||||
Status s = Tensor::CreateTensor(&ret_value, return_value_list, TensorShape({bbox_count, bbox_val_count}));
|
||||
Status s = Tensor::CreateFromVector(return_value_list, TensorShape({bbox_count, bbox_val_count}), &ret_value);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
(*target_BBox) = ret_value; // load bbox from file into return
|
||||
return true;
|
||||
|
|
|
@ -52,9 +52,11 @@ std::string CVOpCommon::GetFilename() {
|
|||
|
||||
void CVOpCommon::GetInputImage(std::string filename) {
|
||||
try {
|
||||
Tensor::CreateTensor(&raw_input_tensor_, filename);
|
||||
Tensor::CreateFromFile(filename, &raw_input_tensor_);
|
||||
raw_cv_image_ = cv::imread(filename, cv::ImreadModes::IMREAD_COLOR);
|
||||
input_tensor_ = std::dynamic_pointer_cast<Tensor>(std::make_shared<CVTensor>(raw_cv_image_));
|
||||
std::shared_ptr<CVTensor> input_cv_tensor;
|
||||
CVTensor::CreateFromMat(raw_cv_image_, &input_cv_tensor);
|
||||
input_tensor_ = std::dynamic_pointer_cast<Tensor>(input_cv_tensor);
|
||||
SwapRedAndBlue(input_tensor_, &input_tensor_);
|
||||
if (raw_cv_image_.data) {
|
||||
MS_LOG(INFO) << "Reading was successful. Height:" << raw_cv_image_.rows << " Width: " << raw_cv_image_.cols
|
||||
|
|
|
@ -29,14 +29,14 @@ class MindDataTestConcatenateOp : public UT::Common {
|
|||
|
||||
TEST_F(MindDataTestConcatenateOp, TestOp) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp.";
|
||||
uint64_t labels[3] = {1, 1, 2};
|
||||
std::vector<uint64_t> labels = {1, 1, 2};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input =
|
||||
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(labels, &input);
|
||||
|
||||
uint64_t append_labels[3] = {4, 4, 4};
|
||||
std::shared_ptr<Tensor> append =
|
||||
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(append_labels));
|
||||
std::vector<uint64_t> append_labels = {4, 4, 4};
|
||||
std::shared_ptr<Tensor> append;
|
||||
Tensor::CreateFromVector(append_labels, &append);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
std::unique_ptr<ConcatenateOp> op(new ConcatenateOp(0, nullptr, append));
|
||||
|
@ -44,10 +44,11 @@ TEST_F(MindDataTestConcatenateOp, TestOp) {
|
|||
in.push_back(input);
|
||||
TensorRow out_row;
|
||||
Status s = op->Compute(in, &out_row);
|
||||
uint64_t out[6] = {1, 1, 2, 4, 4, 4};
|
||||
std::vector<uint64_t> out = {1, 1, 2, 4, 4, 4};
|
||||
|
||||
std::shared_ptr<Tensor> expected;
|
||||
Tensor::CreateFromVector(out, &expected);
|
||||
|
||||
std::shared_ptr<Tensor> expected =
|
||||
std::make_shared<Tensor>(TensorShape{6}, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(out));
|
||||
output = out_row[0];
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
ASSERT_TRUE(output->shape() == expected->shape());
|
||||
|
|
|
@ -32,9 +32,9 @@ class MindDataTestDuplicateOp : public UT::Common {
|
|||
|
||||
TEST_F(MindDataTestDuplicateOp, Basics) {
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateTensor(&t, std::vector<uint32_t>({1, 2, 3, 4, 5, 6}));
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({1, 2, 3, 4, 5, 6}), &t);
|
||||
std::shared_ptr<Tensor> v;
|
||||
Tensor::CreateTensor(&v, std::vector<uint32_t>({3}), TensorShape::CreateScalar());
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({3}), TensorShape::CreateScalar(), &v);
|
||||
std::shared_ptr<DuplicateOp> op = std::make_shared<DuplicateOp>();
|
||||
TensorRow in;
|
||||
in.push_back(t);
|
||||
|
|
|
@ -29,23 +29,20 @@ class MindDataTestFillOp : public UT::Common {
|
|||
|
||||
TEST_F(MindDataTestFillOp, TestOp) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestFillOp-TestOp.";
|
||||
uint64_t labels[3] = {1, 1, 2};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input =
|
||||
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
|
||||
std::vector<uint64_t> labels = {1, 1, 2};
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(labels, &input);
|
||||
|
||||
TensorShape fill_shape({});
|
||||
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_UINT64));
|
||||
fill_tensor->SetItemAt<uint64_t>({}, 4);
|
||||
std::shared_ptr<Tensor> fill_tensor;
|
||||
Tensor::CreateScalar<uint64_t>(4, &fill_tensor);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
std::unique_ptr<FillOp> op(new FillOp(fill_tensor));
|
||||
Status s = op->Compute(input, &output);
|
||||
|
||||
uint64_t out[3] = {4, 4, 4};
|
||||
|
||||
std::shared_ptr<Tensor> expected =
|
||||
std::make_shared<Tensor>(TensorShape{3}, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(out));
|
||||
std::vector<uint64_t> out = {4, 4, 4};
|
||||
std::shared_ptr<Tensor> expected;
|
||||
Tensor::CreateFromVector(out, &expected);
|
||||
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
ASSERT_TRUE(output->shape() == expected->shape());
|
||||
|
@ -59,23 +56,20 @@ TEST_F(MindDataTestFillOp, TestOp) {
|
|||
|
||||
TEST_F(MindDataTestFillOp, TestCasting) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestFillOp-TestCasting.";
|
||||
uint64_t labels[3] = {0, 1, 2};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input =
|
||||
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
|
||||
std::vector<uint64_t> labels = {0, 1, 2};
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(labels, &input);
|
||||
|
||||
TensorShape fill_shape({});
|
||||
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_FLOAT32));
|
||||
fill_tensor->SetItemAt<float>({}, 2.0);
|
||||
std::shared_ptr<Tensor> fill_tensor;
|
||||
Tensor::CreateScalar<float>(2.0, &fill_tensor);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
std::unique_ptr<FillOp> op(new FillOp(fill_tensor));
|
||||
Status s = op->Compute(input, &output);
|
||||
|
||||
uint64_t out[3] = {2, 2, 2};
|
||||
|
||||
std::shared_ptr<Tensor> expected =
|
||||
std::make_shared<Tensor>(TensorShape{3}, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(out));
|
||||
std::vector<uint64_t> out = {2, 2, 2};
|
||||
std::shared_ptr<Tensor> expected;
|
||||
Tensor::CreateFromVector(out, &expected);
|
||||
|
||||
ASSERT_TRUE(output->shape() == expected->shape());
|
||||
ASSERT_TRUE(output->type() == expected->type());
|
||||
|
@ -90,15 +84,15 @@ TEST_F(MindDataTestFillOp, TestCasting) {
|
|||
|
||||
TEST_F(MindDataTestFillOp, ScalarFill) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestFillOp-ScalarFill.";
|
||||
uint64_t labels[3] = {0, 1, 2};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input =
|
||||
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
|
||||
std::vector<uint64_t> labels = {0, 1, 2};
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(labels, &input);
|
||||
|
||||
TensorShape fill_shape({2});
|
||||
uint64_t fill_labels[3] = {0, 1};
|
||||
std::shared_ptr<Tensor> fill_tensor =
|
||||
std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(fill_labels));
|
||||
std::vector<uint64_t> fill_labels = {0, 1};
|
||||
std::shared_ptr<Tensor> fill_tensor;
|
||||
Tensor::CreateFromVector(fill_labels, &fill_tensor);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
std::unique_ptr<FillOp> op(new FillOp(fill_tensor));
|
||||
Status s = op->Compute(input, &output);
|
||||
|
@ -112,12 +106,11 @@ TEST_F(MindDataTestFillOp, ScalarFill) {
|
|||
TEST_F(MindDataTestFillOp, StringFill) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestFillOp-StringFill.";
|
||||
std::vector<std::string> strings = {"xyzzy", "plugh", "abracadabra"};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(strings, &input);
|
||||
|
||||
TensorShape fill_shape({});
|
||||
std::string fill_string = "hello";
|
||||
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_string);
|
||||
std::shared_ptr<Tensor> fill_tensor;
|
||||
Tensor::CreateScalar<std::string>("hello", &fill_tensor);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
|
||||
|
@ -125,8 +118,8 @@ TEST_F(MindDataTestFillOp, StringFill) {
|
|||
Status s = op->Compute(input, &output);
|
||||
|
||||
std::vector<std::string> expected_strings = {"hello", "hello", "hello"};
|
||||
TensorShape expected_shape({3});
|
||||
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(expected_strings, expected_shape);
|
||||
std::shared_ptr<Tensor> expected;
|
||||
Tensor::CreateFromVector(expected_strings, &expected);
|
||||
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
ASSERT_TRUE(output->shape() == expected->shape());
|
||||
|
@ -142,12 +135,11 @@ TEST_F(MindDataTestFillOp, StringFill) {
|
|||
TEST_F(MindDataTestFillOp, NumericToString) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestFillOp-NumericToString.";
|
||||
std::vector<std::string> strings = {"xyzzy", "plugh", "abracadabra"};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(strings, &input);
|
||||
|
||||
TensorShape fill_shape({});
|
||||
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_shape, DataType(DataType::DE_FLOAT32));
|
||||
fill_tensor->SetItemAt<float>({}, 2.0);
|
||||
std::shared_ptr<Tensor> fill_tensor;
|
||||
Tensor::CreateScalar<float>(2.0, &fill_tensor);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
|
||||
|
@ -162,14 +154,12 @@ TEST_F(MindDataTestFillOp, NumericToString) {
|
|||
|
||||
TEST_F(MindDataTestFillOp, StringToNumeric) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestFillOp-StringToNumeric.";
|
||||
uint64_t labels[3] = {0, 1, 2};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input =
|
||||
std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64), reinterpret_cast<unsigned char *>(labels));
|
||||
std::vector<uint64_t> labels = {0, 1, 2};
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(labels, &input);
|
||||
|
||||
TensorShape fill_shape({});
|
||||
std::string fill_string = "hello";
|
||||
std::shared_ptr<Tensor> fill_tensor = std::make_shared<Tensor>(fill_string);
|
||||
std::shared_ptr<Tensor> fill_tensor;
|
||||
Tensor::CreateScalar<std::string>("hello", &fill_tensor);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
|
||||
|
|
|
@ -68,8 +68,7 @@ std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int6
|
|||
Status Create1DTensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr,
|
||||
DataType::Type data_type = DataType::DE_UINT32) {
|
||||
TensorShape shape(std::vector<int64_t>(1, num_elements));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, DataType(data_type), data));
|
||||
(*sample_ids)->AllocateBuffer((*sample_ids)->SizeInBytes()); // allocate memory in case user forgets!
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(data_type), data, sample_ids));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -42,7 +42,8 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opFuntions) {
|
|||
TensorRow input, output;
|
||||
std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
|
||||
|
||||
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("今天天气太好了我们一起去外面玩吧");
|
||||
std::shared_ptr<Tensor> input_tensor;
|
||||
Tensor::CreateScalar<std::string>("今天天气太好了我们一起去外面玩吧", &input_tensor);
|
||||
input.push_back(input_tensor);
|
||||
Status s = op->Compute(input, &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -66,7 +67,8 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opAdd) {
|
|||
std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
|
||||
|
||||
op->AddWord("男默女泪");
|
||||
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("男默女泪");
|
||||
std::shared_ptr<Tensor> input_tensor;
|
||||
Tensor::CreateScalar<std::string>("男默女泪", &input_tensor);
|
||||
input.push_back(input_tensor);
|
||||
Status s = op->Compute(input, &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -84,7 +86,8 @@ TEST_F(MindDataTestJiebaTokenizerOp, TestJieba_opEmpty) {
|
|||
std::unique_ptr<JiebaTokenizerOp> op(new JiebaTokenizerOp(hmm_path, mp_path));
|
||||
|
||||
op->AddWord("男默女泪");
|
||||
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("");
|
||||
std::shared_ptr<Tensor> input_tensor;
|
||||
Tensor::CreateScalar<std::string>("", &input_tensor);
|
||||
input.push_back(input_tensor);
|
||||
Status s = op->Compute(input, &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
|
|
@ -71,9 +71,9 @@ TEST_F(MindDataTestManifest, TestSequentialManifestWithRepeat) {
|
|||
di.GetNextAsMap(&tensor_map);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
uint64_t i = 0;
|
||||
uint32_t label = 0;
|
||||
int32_t label = 0;
|
||||
while (tensor_map.size() != 0) {
|
||||
tensor_map["label"]->GetItemAt<uint32_t>(&label, {});
|
||||
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
|
||||
EXPECT_TRUE(res[i] == label);
|
||||
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
|
||||
i++;
|
||||
|
@ -101,9 +101,9 @@ TEST_F(MindDataTestManifest, TestSubsetRandomSamplerManifest) {
|
|||
rc = di.GetNextAsMap(&tensor_map);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
uint64_t i = 0;
|
||||
uint32_t label = 0;
|
||||
int32_t label = 0;
|
||||
while (tensor_map.size() != 0) {
|
||||
tensor_map["label"]->GetItemAt<uint32_t>(&label, {});
|
||||
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
|
||||
i++;
|
||||
di.GetNextAsMap(&tensor_map);
|
||||
EXPECT_EQ(label, 1);
|
||||
|
@ -131,9 +131,9 @@ TEST_F(MindDataTestManifest, MindDataTestManifestClassIndex) {
|
|||
di.GetNextAsMap(&tensor_map);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
uint64_t i = 0;
|
||||
uint32_t label = 0;
|
||||
int32_t label = 0;
|
||||
while (tensor_map.size() != 0) {
|
||||
tensor_map["label"]->GetItemAt<uint32_t>(&label, {});
|
||||
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
|
||||
EXPECT_TRUE(label == res[i]);
|
||||
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
|
||||
i++;
|
||||
|
@ -160,9 +160,9 @@ TEST_F(MindDataTestManifest, MindDataTestManifestNumSamples) {
|
|||
di.GetNextAsMap(&tensor_map);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
uint64_t i = 0;
|
||||
uint32_t label = 0;
|
||||
int32_t label = 0;
|
||||
while (tensor_map.size() != 0) {
|
||||
tensor_map["label"]->GetItemAt<uint32_t>(&label, {});
|
||||
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
|
||||
EXPECT_TRUE(0 == label);
|
||||
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
|
||||
i++;
|
||||
|
@ -176,7 +176,7 @@ TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
|
|||
std::string file = datasets_root_path_ + "/testManifestData/cpp.json";
|
||||
int64_t num_samples = 1;
|
||||
int64_t start_index = 0;
|
||||
auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
|
||||
auto seq_sampler = std::make_shared<SequentialSampler>(num_samples, start_index);
|
||||
auto tree = Build({Manifest(16, 2, 32, file, "eval", std::move(seq_sampler), {})});
|
||||
tree->Prepare();
|
||||
Status rc = tree->Launch();
|
||||
|
@ -189,9 +189,9 @@ TEST_F(MindDataTestManifest, MindDataTestManifestEval) {
|
|||
di.GetNextAsMap(&tensor_map);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
uint64_t i = 0;
|
||||
uint32_t label = 0;
|
||||
int32_t label = 0;
|
||||
while (tensor_map.size() != 0) {
|
||||
tensor_map["label"]->GetItemAt<uint32_t>(&label, {});
|
||||
tensor_map["label"]->GetItemAt<int32_t>(&label, {});
|
||||
EXPECT_TRUE(0 == label);
|
||||
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "\n";
|
||||
i++;
|
||||
|
|
|
@ -38,9 +38,9 @@ class MindDataTestMaskOp : public UT::Common {
|
|||
|
||||
TEST_F(MindDataTestMaskOp, Basics) {
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateTensor(&t, std::vector<uint32_t>({1, 2, 3, 4, 5, 6}));
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({1, 2, 3, 4, 5, 6}), &t);
|
||||
std::shared_ptr<Tensor> v;
|
||||
Tensor::CreateTensor(&v, std::vector<uint32_t>({3}), TensorShape::CreateScalar());
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({3}), TensorShape::CreateScalar(), &v);
|
||||
std::shared_ptr<MaskOp> op = std::make_shared<MaskOp>(RelationalOp::kEqual, v, DataType(DataType::DE_UINT16));
|
||||
std::shared_ptr<Tensor> out;
|
||||
ASSERT_TRUE(op->Compute(t, &out).IsOk());
|
||||
|
|
|
@ -29,19 +29,17 @@ class MindDataTestOneHotOp : public UT::Common {
|
|||
|
||||
TEST_F(MindDataTestOneHotOp, TestOp) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestOneHotOp.";
|
||||
uint64_t labels[3] = {0, 1, 2};
|
||||
TensorShape shape({3});
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(shape, DataType(DataType::DE_UINT64),
|
||||
reinterpret_cast <unsigned char *>(labels));
|
||||
std::vector<uint64_t> labels = {0, 1, 2};
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(labels, &input);
|
||||
std::shared_ptr<Tensor> output;
|
||||
|
||||
std::unique_ptr<OneHotOp> op(new OneHotOp(5));
|
||||
Status s = op->Compute(input, &output);
|
||||
uint64_t out[15] = {1, 0, 0, 0, 0,
|
||||
0, 1, 0, 0, 0,
|
||||
0, 0, 1, 0, 0};
|
||||
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(TensorShape{3, 5}, DataType(DataType::DE_UINT64),
|
||||
reinterpret_cast <unsigned char *>(out));
|
||||
std::vector<uint64_t> out = {1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0};
|
||||
std::shared_ptr<Tensor> expected;
|
||||
Tensor::CreateFromVector(out, TensorShape{3, 5}, &expected);
|
||||
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
ASSERT_TRUE(output->shape() == expected->shape());
|
||||
ASSERT_TRUE(output->type() == expected->type());
|
||||
|
|
|
@ -35,44 +35,40 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
|
|||
TensorShape pad_data_shape({1});
|
||||
|
||||
// prepare input tensor
|
||||
float_t orig1[4] = {1, 1, 1, 1};
|
||||
std::vector<float> orig1 = {1, 1, 1, 1};
|
||||
TensorShape input_shape1({2, 2});
|
||||
std::vector<TensorShape> input_shape1_vector = {input_shape1};
|
||||
std::shared_ptr<Tensor> input1 =
|
||||
std::make_shared<Tensor>(input_shape1, DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(orig1));
|
||||
std::shared_ptr<Tensor> input1;
|
||||
Tensor::CreateFromVector(orig1, input_shape1, &input1);
|
||||
|
||||
// pad_shape
|
||||
TensorShape pad_shape1[3] = {TensorShape({3, 3}), TensorShape({2, 4}), TensorShape({4, 2})};
|
||||
|
||||
// value to pad
|
||||
float_t pad_data1[3][1] = {0, 3.5, 3.5};
|
||||
std::vector<std::vector<float>> pad_data1 = {{0}, {3.5}, {3.5}};
|
||||
|
||||
std::shared_ptr<Tensor> expected1[3];
|
||||
|
||||
// expected tensor output for testunit 1
|
||||
float_t out1[9] = {1, 1, 0, 1, 1, 0, 0, 0, 0};
|
||||
|
||||
expected1[0] =
|
||||
std::make_shared<Tensor>(pad_shape1[0], DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(out1));
|
||||
std::vector<float> out1 = {1, 1, 0, 1, 1, 0, 0, 0, 0};
|
||||
Tensor::CreateFromVector(out1, pad_shape1[0], &(expected1[0]));
|
||||
|
||||
// expected tensor output for testunit 2
|
||||
float_t out2[8] = {1, 1, 3.5, 3.5, 1, 1, 3.5, 3.5};
|
||||
|
||||
expected1[1] =
|
||||
std::make_shared<Tensor>(pad_shape1[1], DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(out2));
|
||||
std::vector<float> out2 = {1, 1, 3.5, 3.5, 1, 1, 3.5, 3.5};
|
||||
Tensor::CreateFromVector(out2, pad_shape1[1], &(expected1[1]));
|
||||
|
||||
// expected tensor output for testunit 3
|
||||
float_t out3[8] = {1, 1, 1, 1, 3.5, 3.5, 3.5, 3.5};
|
||||
|
||||
expected1[2] =
|
||||
std::make_shared<Tensor>(pad_shape1[2], DataType(DataType::DE_FLOAT32), reinterpret_cast<unsigned char *>(out3));
|
||||
std::vector<float> out3 = {1, 1, 1, 1, 3.5, 3.5, 3.5, 3.5};
|
||||
Tensor::CreateFromVector(out3, pad_shape1[2], &(expected1[2]));
|
||||
|
||||
// run the PadEndOp
|
||||
for (auto i = 0; i < 3; i++) {
|
||||
std::shared_ptr<Tensor> output;
|
||||
std::vector<TensorShape> output_shape = {TensorShape({})};
|
||||
std::shared_ptr<Tensor> pad_value1 = std::make_shared<Tensor>(pad_data_shape, DataType(DataType::DE_FLOAT32),
|
||||
reinterpret_cast<unsigned char *>(pad_data1[i]));
|
||||
|
||||
std::shared_ptr<Tensor> pad_value1;
|
||||
Tensor::CreateFromVector(pad_data1[i], pad_data_shape, &pad_value1);
|
||||
|
||||
std::unique_ptr<PadEndOp> op(new PadEndOp(pad_shape1[i], pad_value1));
|
||||
Status s = op->Compute(input1, &output);
|
||||
|
||||
|
@ -96,7 +92,7 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
|
|||
TensorShape input_shape2({2});
|
||||
std::vector<TensorShape> input_shape2_vector = {input_shape2};
|
||||
std::shared_ptr<Tensor> input2;
|
||||
Tensor::CreateTensor(&input2, orig2, input_shape2);
|
||||
Tensor::CreateFromVector(orig2, input_shape2, &input2);
|
||||
|
||||
// pad_shape
|
||||
TensorShape pad_shape2[3] = {TensorShape({5}), TensorShape({2}), TensorShape({10})};
|
||||
|
@ -112,7 +108,7 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
|
|||
|
||||
for (auto i = 0; i < 3; i++) {
|
||||
// pad value
|
||||
Tensor::CreateTensor(&pad_value2[i], pad_data2[i], pad_data_shape);
|
||||
Tensor::CreateFromVector(pad_data2[i], pad_data_shape, &pad_value2[i]);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
std::vector<TensorShape> output_shape = {TensorShape({})};
|
||||
|
@ -121,7 +117,7 @@ TEST_F(MindDataTestPadEndOp, TestOp) {
|
|||
|
||||
Status s = op->Compute(input2, &output);
|
||||
|
||||
Tensor::CreateTensor(&expected2[i], outstring[i], pad_shape2[i]);
|
||||
Tensor::CreateFromVector(outstring[i], pad_shape2[i], &expected2[i]);
|
||||
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
ASSERT_TRUE(output->shape() == expected2[i]->shape());
|
||||
|
|
|
@ -93,7 +93,6 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) {
|
|||
rc = di.FetchNextTensorRow(&tensor_list);
|
||||
}
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromFileFuntions) {
|
||||
|
@ -166,9 +165,10 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) {
|
|||
rc = di.FetchNextTensorRow(&tensor_list);
|
||||
}
|
||||
std::shared_ptr<Tensor> output_tensor;
|
||||
std::unique_ptr<SentencePieceTokenizerOp> op(new SentencePieceTokenizerOp(spm,
|
||||
SPieceTokenizerLoadType::kModel, SPieceTokenizerOutType::kString));
|
||||
std::shared_ptr<Tensor> input_tensor = std::make_shared<Tensor>("I saw a girl with a telescope.");
|
||||
std::unique_ptr<SentencePieceTokenizerOp> op(
|
||||
new SentencePieceTokenizerOp(spm, SPieceTokenizerLoadType::kModel, SPieceTokenizerOutType::kString));
|
||||
std::shared_ptr<Tensor> input_tensor;
|
||||
Tensor::CreateScalar<std::string>("I saw a girl with a telescope.", &input_tensor);
|
||||
Status s = op->Compute(input_tensor, &output_tensor);
|
||||
|
||||
std::vector<std::string> expect;
|
||||
|
|
|
@ -31,15 +31,17 @@ TEST_F(MindDataTestSlidingWindowOp, Compute) {
|
|||
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->Compute.";
|
||||
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
|
||||
TensorShape shape({static_cast<dsize_t>(strings.size())});
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(strings, shape, &input);
|
||||
std::shared_ptr<Tensor> output;
|
||||
|
||||
std::unique_ptr<SlidingWindowOp> op(new SlidingWindowOp(3, 0));
|
||||
Status s = op->Compute(input, &output);
|
||||
|
||||
std::vector<std::string> out = {"one", "two", "three", "two", "three", "four", "three", "four", "five",
|
||||
"four", "five", "six", "five", "six", "seven", "six", "seven", "eight"};
|
||||
std::shared_ptr<Tensor> expected = std::make_shared<Tensor>(out, TensorShape({6, 3}));
|
||||
std::vector<std::string> out = {"one", "two", "three", "two", "three", "four", "three", "four", "five",
|
||||
"four", "five", "six", "five", "six", "seven", "six", "seven", "eight"};
|
||||
std::shared_ptr<Tensor> expected;
|
||||
Tensor::CreateFromVector(out, TensorShape({6, 3}), &expected);
|
||||
|
||||
ASSERT_TRUE(output->shape() == expected->shape());
|
||||
ASSERT_TRUE(output->type() == expected->type());
|
||||
|
@ -54,7 +56,8 @@ TEST_F(MindDataTestSlidingWindowOp, OutputShape) {
|
|||
MS_LOG(INFO) << "Doing MindDataTestSlidingWindowOp->OutputShape.";
|
||||
std::vector<std::string> strings = {"one", "two", "three", "four", "five", "six", "seven", "eight"};
|
||||
TensorShape shape({static_cast<dsize_t>(strings.size())});
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>(strings, shape);
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateFromVector(strings, shape, &input);
|
||||
std::vector<TensorShape> input_shape = {input->shape()};
|
||||
std::vector<TensorShape> output_shape = {TensorShape({})};
|
||||
|
||||
|
|
|
@ -30,8 +30,7 @@ using namespace mindspore::dataset;
|
|||
|
||||
Status CreateINT64Tensor(std::shared_ptr<Tensor> *sample_ids, int64_t num_elements, unsigned char *data = nullptr) {
|
||||
TensorShape shape(std::vector<int64_t>(1, num_elements));
|
||||
RETURN_IF_NOT_OK(Tensor::CreateTensor(sample_ids, TensorImpl::kFlexible, shape, DataType(DataType::DE_INT64), data));
|
||||
(*sample_ids)->AllocateBuffer((*sample_ids)->SizeInBytes()); // allocate memory in case user forgets!
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromMemory(shape, DataType(DataType::DE_INT64), data, sample_ids));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -54,8 +53,7 @@ TEST_F(MindDataTestStandAloneSampler, TestDistributedSampler) {
|
|||
{0, 17, 4, 10, 14, 8, 15}, {13, 9, 16, 3, 2, 19, 12}, {1, 11, 6, 18, 7, 5, 0}};
|
||||
for (int i = 0; i < 6; i++) {
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateTensor(&t, TensorImpl::kFlexible, TensorShape({7}),
|
||||
DataType(DataType::DE_INT64), (unsigned char *)(res[i]));
|
||||
Tensor::CreateFromMemory(TensorShape({7}), DataType(DataType::DE_INT64), (unsigned char *)(res[i]), &t);
|
||||
row.push_back(t);
|
||||
}
|
||||
MockStorageOp mock(20);
|
||||
|
|
|
@ -35,13 +35,15 @@ class MindDataTestStringTensorDE : public UT::Common {
|
|||
};
|
||||
|
||||
TEST_F(MindDataTestStringTensorDE, Basics) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>("Hi");
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateScalar<std::string>("Hi", &t);
|
||||
ASSERT_TRUE(t->shape() == TensorShape({}));
|
||||
std::string_view s = "";
|
||||
t->GetItemAt(&s, {});
|
||||
ASSERT_TRUE(s == "Hi");
|
||||
|
||||
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(std::vector<std::string>{"Hi", "Bye"});
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateFromVector(std::vector<std::string>{"Hi", "Bye"}, &t2);
|
||||
ASSERT_TRUE(t2->shape() == TensorShape({2}));
|
||||
t2->GetItemAt(&s, {0});
|
||||
ASSERT_TRUE(s == "Hi");
|
||||
|
@ -49,7 +51,9 @@ TEST_F(MindDataTestStringTensorDE, Basics) {
|
|||
ASSERT_TRUE(s == "Bye");
|
||||
|
||||
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
|
||||
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
std::shared_ptr<Tensor> t3;
|
||||
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t3);
|
||||
|
||||
ASSERT_TRUE(t3->shape() == TensorShape({2, 3}));
|
||||
uint32_t index = 0;
|
||||
for (uint32_t i = 0; i < 2; i++) {
|
||||
|
@ -62,8 +66,10 @@ TEST_F(MindDataTestStringTensorDE, Basics) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestStringTensorDE, Basics2) {
|
||||
std::shared_ptr<Tensor> t =
|
||||
std::make_shared<Tensor>(std::vector<std::string>{"abc", "defg", "hi", "klmno", "123", "789"}, TensorShape({2, 3}));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateFromVector(std::vector<std::string>{"abc", "defg", "hi", "klmno", "123", "789"}, TensorShape({2, 3}),
|
||||
&t);
|
||||
|
||||
ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 20 + 4);
|
||||
std::vector<uint32_t> offsets = {0, 4, 9, 12, 18, 22, 26};
|
||||
uint32_t ctr = 0;
|
||||
|
@ -86,7 +92,8 @@ TEST_F(MindDataTestStringTensorDE, Basics2) {
|
|||
|
||||
TEST_F(MindDataTestStringTensorDE, Empty) {
|
||||
std::vector<std::string> strings{"abc", "defg", "", "", "123", ""};
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t);
|
||||
// abc_defg___123__
|
||||
// 0123456789012345
|
||||
ASSERT_TRUE(t->SizeInBytes() == 6 * 5 + 10 + 4);
|
||||
|
@ -112,7 +119,9 @@ TEST_F(MindDataTestStringTensorDE, Empty) {
|
|||
|
||||
TEST_F(MindDataTestStringTensorDE, SetItem) {
|
||||
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
|
||||
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
std::shared_ptr<Tensor> t3;
|
||||
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t3);
|
||||
|
||||
ASSERT_TRUE(t3->shape() == TensorShape({2, 3}));
|
||||
|
||||
t3->SetItemAt({0, 1}, std::string{"xyzz"});
|
||||
|
@ -136,7 +145,8 @@ TEST_F(MindDataTestStringTensorDE, SetItem) {
|
|||
|
||||
TEST_F(MindDataTestStringTensorDE, Iterator) {
|
||||
std::vector<std::string> strings{"abc", "defg", "hi", "klmno", "123", "789"};
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(strings, TensorShape({2, 3}));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateFromVector(strings, TensorShape({2, 3}), &t);
|
||||
uint32_t index = 0;
|
||||
auto itr = t->begin<std::string_view>();
|
||||
for (; itr != t->end<std::string_view>(); itr++) {
|
||||
|
|
|
@ -35,8 +35,9 @@ class MindDataTestTensorDE : public UT::Common {
|
|||
};
|
||||
|
||||
TEST_F(MindDataTestTensorDE, Basics) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
|
||||
ASSERT_TRUE((t->AllocateBuffer(t->SizeInBytes())).IsOk());
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_UINT64), &t);
|
||||
|
||||
ASSERT_EQ(t->shape(), TensorShape({2, 3}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_UINT64);
|
||||
ASSERT_EQ(t->SizeInBytes(), 2 * 3 * 8);
|
||||
|
@ -67,28 +68,30 @@ TEST_F(MindDataTestTensorDE, Basics) {
|
|||
ASSERT_EQ(t->ToString(), "Tensor (shape: <2,3>, Type: uint64)\n[[1,2,3],[4,5,6]]");
|
||||
std::vector<uint64_t> x = {1, 2, 3, 4, 5, 6};
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateTensor(&t2, x, TensorShape({2, 3}));
|
||||
Tensor::CreateFromVector(x, TensorShape({2, 3}), &t2);
|
||||
|
||||
ASSERT_EQ(*t == *t2, true);
|
||||
ASSERT_EQ(*t != *t2, false);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, Fill) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32), &t);
|
||||
t->Fill<float>(2.5);
|
||||
std::vector<float> x = {2.5, 2.5, 2.5, 2.5};
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateTensor(&t2, x, TensorShape({2, 2}));
|
||||
Tensor::CreateFromVector(x, TensorShape({2, 2}), &t2);
|
||||
ASSERT_EQ(*t == *t2, true);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, Reshape) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
|
||||
t->Fill<uint8_t>(254);
|
||||
t->Reshape(TensorShape({4}));
|
||||
std::vector<uint8_t> x = {254, 254, 254, 254};
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateTensor(&t2, x);
|
||||
Tensor::CreateFromVector(x, &t2);
|
||||
|
||||
ASSERT_EQ(*t == *t2, true);
|
||||
Status rc = t->Reshape(TensorShape({5}));
|
||||
|
@ -102,7 +105,8 @@ TEST_F(MindDataTestTensorDE, Reshape) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, CopyTensor) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({}), DataType(DataType::DE_INT16));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({}), DataType(DataType::DE_INT16), &t);
|
||||
t->SetItemAt<int16_t>({}, -66);
|
||||
ASSERT_EQ(t->shape(), TensorShape({}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_INT16);
|
||||
|
@ -125,30 +129,31 @@ TEST_F(MindDataTestTensorDE, CopyTensor) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, InsertTensor) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_FLOAT64));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 3}), DataType(DataType::DE_FLOAT64), &t);
|
||||
std::vector<double> x = {1.1, 2.1, 3.1};
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateTensor(&t2, x);
|
||||
Tensor::CreateFromVector(x, &t2);
|
||||
|
||||
std::vector<double> y = {1.2, 2.2, 3.2};
|
||||
std::shared_ptr<Tensor> t3;
|
||||
Tensor::CreateTensor(&t3, y);
|
||||
Tensor::CreateFromVector(y, &t3);
|
||||
|
||||
ASSERT_TRUE(t->InsertTensor({0}, t2).OK());
|
||||
ASSERT_TRUE(t->InsertTensor({1}, t3).OK());
|
||||
std::vector<double> z = {1.1, 2.1, 3.1, 1.2, 2.2, 3.2};
|
||||
|
||||
std::shared_ptr<Tensor> t4;
|
||||
Tensor::CreateTensor(&t4, z, TensorShape({2, 3}));
|
||||
Tensor::CreateFromVector(z, TensorShape({2, 3}), &t4);
|
||||
ASSERT_EQ(*t == *t4, true);
|
||||
|
||||
std::shared_ptr<Tensor> t5;
|
||||
Tensor::CreateTensor<double>(&t5, 0);
|
||||
Tensor::CreateScalar<double>(0, &t5);
|
||||
|
||||
ASSERT_TRUE(t->InsertTensor({1, 2}, t5).OK());
|
||||
z[5] = 0;
|
||||
std::shared_ptr<Tensor> t6;
|
||||
Tensor::CreateTensor(&t6, z, TensorShape({2, 3}));
|
||||
Tensor::CreateFromVector(z, TensorShape({2, 3}), &t6);
|
||||
|
||||
ASSERT_EQ(*t == *t6, true);
|
||||
ASSERT_EQ(t->InsertTensor({2}, t5).get_code(), StatusCode::kUnexpectedError);
|
||||
|
@ -161,7 +166,8 @@ TEST_F(MindDataTestTensorDE, InsertTensor) {
|
|||
|
||||
// Test the bug of Tensor::ToString will exec failed for Tensor which store bool values
|
||||
TEST_F(MindDataTestTensorDE, BoolTensor) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2}), DataType(DataType::DE_BOOL));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2}), DataType(DataType::DE_BOOL), &t);
|
||||
t->SetItemAt<bool>({0}, true);
|
||||
t->SetItemAt<bool>({1}, true);
|
||||
std::string out = t->ToString();
|
||||
|
@ -169,7 +175,8 @@ TEST_F(MindDataTestTensorDE, BoolTensor) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, GetItemAt) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
|
||||
t->Fill<uint8_t>(254);
|
||||
uint64_t o1;
|
||||
t->GetItemAt<uint64_t>(&o1, {0, 0});
|
||||
|
@ -183,7 +190,8 @@ TEST_F(MindDataTestTensorDE, GetItemAt) {
|
|||
uint8_t o4;
|
||||
t->GetItemAt<uint8_t>(&o4, {1, 1});
|
||||
ASSERT_EQ(o4, 254);
|
||||
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_INT8));
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_INT8), &t2);
|
||||
t2->Fill<int8_t>(-10);
|
||||
int64_t o5;
|
||||
t2->GetItemAt<int64_t>(&o5, {0, 0});
|
||||
|
@ -197,7 +205,8 @@ TEST_F(MindDataTestTensorDE, GetItemAt) {
|
|||
int8_t o8;
|
||||
t2->GetItemAt<int8_t>(&o8, {1, 1});
|
||||
ASSERT_EQ(o8, -10);
|
||||
std::shared_ptr<Tensor> t3 = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32));
|
||||
std::shared_ptr<Tensor> t3;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_FLOAT32), &t3);
|
||||
t3->Fill<float>(1.1);
|
||||
double o9;
|
||||
t3->GetItemAt<double>(&o9, {0, 0});
|
||||
|
@ -208,9 +217,11 @@ TEST_F(MindDataTestTensorDE, GetItemAt) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, OperatorAssign) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
|
||||
t->Fill<uint8_t>(1);
|
||||
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t2);
|
||||
*t2 = std::move(*t);
|
||||
uint8_t o;
|
||||
t2->GetItemAt(&o, {0, 0});
|
||||
|
@ -224,18 +235,20 @@ TEST_F(MindDataTestTensorDE, OperatorAssign) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, Strides) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT8), &t);
|
||||
std::vector<dsize_t> x1 = t->Strides();
|
||||
std::vector<dsize_t> x2 = {4, 2, 1};
|
||||
ASSERT_EQ(x1, x2);
|
||||
t = std::make_shared<Tensor>(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT32));
|
||||
Tensor::CreateEmpty(TensorShape({4, 2, 2}), DataType(DataType::DE_UINT32), &t);
|
||||
x1 = t->Strides();
|
||||
x2 = {16, 8, 4};
|
||||
ASSERT_EQ(x1, x2);
|
||||
}
|
||||
|
||||
void checkCvMat(TensorShape shape, DataType type) {
|
||||
std::shared_ptr<CVTensor> t = std::make_shared<CVTensor>(shape, type);
|
||||
std::shared_ptr<CVTensor> t;
|
||||
CVTensor::CreateEmpty(shape, type, &t);
|
||||
cv::Mat m = t->mat();
|
||||
ASSERT_EQ(m.data, t->GetBuffer());
|
||||
ASSERT_EQ(static_cast<uchar>(m.type()) & static_cast<uchar>(CV_MAT_DEPTH_MASK), type.AsCVType());
|
||||
|
@ -289,8 +302,10 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
|
|||
m.at<uint8_t>(0, 1) = 20;
|
||||
m.at<uint8_t>(1, 0) = 30;
|
||||
m.at<uint8_t>(1, 1) = 40;
|
||||
std::shared_ptr<CVTensor> cvt = std::make_shared<CVTensor>(m);
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 2}), DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<CVTensor> cvt;
|
||||
CVTensor::CreateFromMat(m, &cvt);
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({2, 2}), DataType(DataType::DE_UINT8), &t);
|
||||
t->SetItemAt<uint8_t>({0, 0}, 10);
|
||||
t->SetItemAt<uint8_t>({0, 1}, 20);
|
||||
t->SetItemAt<uint8_t>({1, 0}, 30);
|
||||
|
@ -302,8 +317,10 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
|
|||
m2.at<uint8_t>(1) = 20;
|
||||
m2.at<uint8_t>(2) = 30;
|
||||
m2.at<uint8_t>(3) = 40;
|
||||
std::shared_ptr<CVTensor> cvt2 = std::make_shared<CVTensor>(m2);
|
||||
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({4}), DataType(DataType::DE_UINT8));
|
||||
std::shared_ptr<CVTensor> cvt2;
|
||||
CVTensor::CreateFromMat(m2, &cvt2);
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateEmpty(TensorShape({4}), DataType(DataType::DE_UINT8), &t2);
|
||||
t2->SetItemAt<uint8_t>({0}, 10);
|
||||
t2->SetItemAt<uint8_t>({1}, 20);
|
||||
t2->SetItemAt<uint8_t>({2}, 30);
|
||||
|
@ -313,10 +330,12 @@ TEST_F(MindDataTestTensorDE, CVTensorFromMat) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, CVTensorAs) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateEmpty(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64), &t);
|
||||
t->Fill<double>(2.2);
|
||||
const unsigned char *addr = t->GetBuffer();
|
||||
std::shared_ptr<Tensor> t2 = std::make_shared<Tensor>(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64));
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateEmpty(TensorShape({3, 2}), DataType(DataType::DE_FLOAT64), &t2);
|
||||
t2->Fill<double>(4.4);
|
||||
std::shared_ptr<CVTensor> ctv = CVTensor::AsCVTensor(t);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
|
@ -326,6 +345,10 @@ TEST_F(MindDataTestTensorDE, CVTensorAs) {
|
|||
ASSERT_EQ(ctv->GetBuffer(), addr);
|
||||
ASSERT_TRUE(*t2 == *ctv);
|
||||
MS_LOG(DEBUG) << *t2 << std::endl << *ctv;
|
||||
cv::Mat m2 = ctv->matCopy();
|
||||
m2 = 2 * m2;
|
||||
ASSERT_EQ(ctv->GetBuffer(), addr);
|
||||
ASSERT_TRUE(*t2 == *ctv);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
|
||||
|
@ -336,23 +359,26 @@ TEST_F(MindDataTestTensorDE, CVTensorMatSlice) {
|
|||
m.at<int32_t>(1, 0) = 40;
|
||||
m.at<int32_t>(1, 1) = 50;
|
||||
m.at<int32_t>(1, 2) = 60;
|
||||
std::shared_ptr<CVTensor> cvt = std::make_shared<CVTensor>(m);
|
||||
std::shared_ptr<CVTensor> cvt;
|
||||
CVTensor::CreateFromMat(m, &cvt);
|
||||
cv::Mat mat;
|
||||
cvt->Mat({1}, &mat);
|
||||
cvt->MatAtIndex({1}, &mat);
|
||||
cv::Mat m2(3, 1, CV_32S);
|
||||
m2.at<int32_t>(0) = 40;
|
||||
m2.at<int32_t>(1) = 50;
|
||||
m2.at<int32_t>(2) = 60;
|
||||
std::shared_ptr<CVTensor> cvt2 = std::make_shared<CVTensor>(mat);
|
||||
std::shared_ptr<CVTensor> cvt3 = std::make_shared<CVTensor>(m2);
|
||||
std::shared_ptr<CVTensor> cvt2;
|
||||
CVTensor::CreateFromMat(mat, &cvt2);
|
||||
std::shared_ptr<CVTensor> cvt3;
|
||||
CVTensor::CreateFromMat(m2, &cvt3);
|
||||
|
||||
ASSERT_TRUE(*cvt2 == *cvt3);
|
||||
cvt->Mat({0}, &mat);
|
||||
cvt->MatAtIndex({0}, &mat);
|
||||
m2.at<int32_t>(0) = 10;
|
||||
m2.at<int32_t>(1) = 20;
|
||||
m2.at<int32_t>(2) = 30;
|
||||
cvt2 = std::make_shared<CVTensor>(mat);
|
||||
cvt3 = std::make_shared<CVTensor>(m2);
|
||||
CVTensor::CreateFromMat(mat, &cvt2);
|
||||
CVTensor::CreateFromMat(m2, &cvt3);
|
||||
ASSERT_TRUE(*cvt2 == *cvt3);
|
||||
}
|
||||
|
||||
|
@ -361,7 +387,7 @@ TEST_F(MindDataTestTensorDE, TensorIterator) {
|
|||
std::vector<uint32_t> values2 = {2, 3, 4, 5, 6, 7};
|
||||
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateTensor(&t, values);
|
||||
Tensor::CreateFromVector(values, &t);
|
||||
|
||||
auto i = t->begin<uint32_t>();
|
||||
auto j = values.begin();
|
||||
|
@ -395,11 +421,11 @@ TEST_F(MindDataTestTensorDE, TensorIterator) {
|
|||
|
||||
TEST_F(MindDataTestTensorDE, TensorSlice) {
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateTensor(&t, std::vector<dsize_t>{0, 1, 2, 3, 4});
|
||||
Tensor::CreateFromVector(std::vector<dsize_t>{0, 1, 2, 3, 4}, &t);
|
||||
std::shared_ptr<Tensor> t2;
|
||||
auto x = std::vector<dsize_t>{0, 3, 4};
|
||||
std::shared_ptr<Tensor> expected;
|
||||
Tensor::CreateTensor(&expected, x);
|
||||
Tensor::CreateFromVector(x, &expected);
|
||||
t->Slice(&t2, x);
|
||||
ASSERT_EQ(*t2, *expected);
|
||||
t->Slice(&t2, std::vector<dsize_t>{0, 1, 2, 3, 4});
|
||||
|
@ -412,13 +438,13 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) {
|
|||
std::vector<uint32_t> expected = {1, 2, 3, 4, 5, 6};
|
||||
|
||||
std::shared_ptr<Tensor> t1;
|
||||
Tensor::CreateTensor(&t1, values1);
|
||||
Tensor::CreateFromVector(values1, &t1);
|
||||
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateTensor(&t2, values2);
|
||||
Tensor::CreateFromVector(values2, &t2);
|
||||
|
||||
std::shared_ptr<Tensor> out;
|
||||
Tensor::CreateTensor(&out, expected);
|
||||
Tensor::CreateFromVector(expected, &out);
|
||||
Status s = t1->Concatenate({3}, t2);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
||||
|
@ -434,15 +460,80 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) {
|
|||
}
|
||||
|
||||
TEST_F(MindDataTestTensorDE, TensorEmpty) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({2, 3}), DataType(DataType::DE_UINT64));
|
||||
ASSERT_TRUE(t->HasData());
|
||||
}
|
||||
TensorPtr t;
|
||||
Status rc = Tensor::CreateEmpty(TensorShape({0}), DataType(DataType::DE_UINT64), &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
||||
TEST_F(MindDataTestTensorDE, TensorEmptyInvalidate) {
|
||||
std::vector<uint32_t> values1 = {1, 2, 3, 0, 0, 0};
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateTensor(&t, values1);
|
||||
t->Invalidate();
|
||||
ASSERT_TRUE(t->HasData());
|
||||
}
|
||||
ASSERT_EQ(t->shape(), TensorShape({0}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_UINT64);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
ASSERT_TRUE(!t->HasData());
|
||||
|
||||
rc = t->SetItemAt<uint64_t>({0}, 7);
|
||||
ASSERT_TRUE(rc.IsError());
|
||||
|
||||
rc = Tensor::CreateEmpty(TensorShape({1, 0}), DataType(DataType::DE_STRING), &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
ASSERT_EQ(t->shape(), TensorShape({1, 0}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_STRING);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
ASSERT_TRUE(!t->HasData());
|
||||
|
||||
std::vector<uint16_t> data;
|
||||
rc = Tensor::CreateFromVector(data, &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
ASSERT_EQ(t->shape(), TensorShape({0}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_UINT16);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
ASSERT_TRUE(!t->HasData());
|
||||
|
||||
std::vector<std::string> data2;
|
||||
rc = Tensor::CreateFromVector(data2, &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
ASSERT_EQ(t->shape(), TensorShape({0}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_STRING);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
ASSERT_TRUE(!t->HasData());
|
||||
|
||||
rc = Tensor::CreateFromVector(data, TensorShape({0, 2}), &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
ASSERT_EQ(t->shape(), TensorShape({0, 2}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_UINT16);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
ASSERT_TRUE(!t->HasData());
|
||||
|
||||
rc = Tensor::CreateFromVector(data2, TensorShape({0, 0, 6}), &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
ASSERT_EQ(t->shape(), TensorShape({0, 0, 6}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_STRING);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
ASSERT_TRUE(!t->HasData());
|
||||
|
||||
rc = Tensor::CreateFromMemory(TensorShape({0}), DataType(DataType::DE_INT8), nullptr, &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
ASSERT_EQ(t->shape(), TensorShape({0}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_INT8);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
ASSERT_TRUE(!t->HasData());
|
||||
|
||||
rc = Tensor::CreateFromMemory(TensorShape({0}), DataType(DataType::DE_STRING), nullptr, &t);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
ASSERT_EQ(t->shape(), TensorShape({0}));
|
||||
ASSERT_EQ(t->type(), DataType::DE_STRING);
|
||||
ASSERT_EQ(t->SizeInBytes(), 0);
|
||||
ASSERT_EQ(t->GetBuffer(), nullptr);
|
||||
|
||||
std::vector<uint32_t> values = {1, 2, 3, 0, 0, 0};
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateFromVector(values, &t2);
|
||||
ASSERT_TRUE(t2->HasData());
|
||||
t2->Invalidate();
|
||||
ASSERT_TRUE(!t2->HasData());
|
||||
}
|
||||
|
|
|
@ -46,8 +46,8 @@ class MindDataTestTokenizerOp : public UT::Common {
|
|||
TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
|
||||
MS_LOG(INFO) << "Doing TestUnicodeCharTokenizerOp.";
|
||||
std::unique_ptr<UnicodeCharTokenizerOp> op(new UnicodeCharTokenizerOp(true));
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Hello World!");
|
||||
TensorRow output;
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("Hello World!", &input); TensorRow output;
|
||||
Status s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 12);
|
||||
|
@ -66,7 +66,7 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
|
|||
CheckEqual(output[0], {10}, "d");
|
||||
CheckEqual(output[0], {11}, "!");
|
||||
|
||||
input = std::make_shared<Tensor>("中国 你好!");
|
||||
Tensor::CreateScalar<std::string>("中国 你好!", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -80,38 +80,38 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
|
|||
CheckEqual(output[0], {4}, "好");
|
||||
CheckEqual(output[0], {5}, "!");
|
||||
|
||||
input = std::make_shared<Tensor>("中");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>("中", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "中");
|
||||
|
||||
input = std::make_shared<Tensor>("H");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>("H", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "H");
|
||||
|
||||
input = std::make_shared<Tensor>(" ");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>(" ", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 2);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, " ");
|
||||
CheckEqual(output[0], {1}, " ");
|
||||
|
||||
input = std::make_shared<Tensor>("");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>("", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
|
||||
|
@ -121,10 +121,10 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeCharTokenizerOp) {
|
|||
TEST_F(MindDataTestTokenizerOp, TestWhitespaceTokenizerOp) {
|
||||
MS_LOG(INFO) << "Doing TestWhitespaceTokenizerOp.";
|
||||
std::unique_ptr<WhitespaceTokenizerOp> op(new WhitespaceTokenizerOp(true));
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China.");
|
||||
TensorRow output;
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("Welcome to China.", &input); TensorRow output;
|
||||
Status s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 3);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor1: " << output[0]->ToString();
|
||||
|
@ -132,37 +132,37 @@ TEST_F(MindDataTestTokenizerOp, TestWhitespaceTokenizerOp) {
|
|||
CheckEqual(output[0], {1}, "to");
|
||||
CheckEqual(output[0], {2}, "China.");
|
||||
|
||||
input = std::make_shared<Tensor>(" hello");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>(" hello", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor2: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "hello");
|
||||
|
||||
input = std::make_shared<Tensor>("hello");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>("hello", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "hello");
|
||||
|
||||
input = std::make_shared<Tensor>("hello ");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>("hello ", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor4: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "hello");
|
||||
|
||||
input = std::make_shared<Tensor>(" ");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>(" ", &input);
|
||||
output.clear();
|
||||
s = op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
|
||||
|
@ -174,8 +174,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
|
|||
std::unique_ptr<UnicodeScriptTokenizerOp> keep_whitespace_op(new UnicodeScriptTokenizerOp(true, true));
|
||||
std::unique_ptr<UnicodeScriptTokenizerOp> skip_whitespace_op(new UnicodeScriptTokenizerOp(false, true));
|
||||
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
|
||||
TensorRow output;
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
|
||||
TensorRow output;
|
||||
Status s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 10);
|
||||
|
@ -204,10 +205,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
|
|||
CheckEqual(output[0], {4}, "中国");
|
||||
CheckEqual(output[0], {5}, "北京");
|
||||
|
||||
input = std::make_shared<Tensor>(" Welcome to 中国. ");
|
||||
output.clear();
|
||||
s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
Tensor::CreateScalar<std::string>(" Welcome to 中国. ", &input);
|
||||
output.clear();
|
||||
s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 4);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor3: " << output[0]->ToString();
|
||||
|
@ -230,25 +230,23 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
|
|||
CheckEqual(output[0], {6}, ".");
|
||||
CheckEqual(output[0], {7}, " ");
|
||||
|
||||
input = std::make_shared<Tensor>("Hello");
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
Tensor::CreateScalar<std::string>("Hello", &input);
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor5: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "Hello");
|
||||
|
||||
input = std::make_shared<Tensor>("H");
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
Tensor::CreateScalar<std::string>("H", &input);
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor6: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "H");
|
||||
|
||||
input = std::make_shared<Tensor>("");
|
||||
Tensor::CreateScalar<std::string>("", &input);
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -257,10 +255,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
|
|||
MS_LOG(INFO) << "Out tensor7: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "");
|
||||
|
||||
input = std::make_shared<Tensor>("Hello中国Hello世界");
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 4);
|
||||
Tensor::CreateScalar<std::string>("Hello中国Hello世界", &input);
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output); EXPECT_TRUE(s.IsOk()); EXPECT_EQ(output[0]->Size(), 4);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor8: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, "Hello");
|
||||
|
@ -268,15 +265,15 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
|
|||
CheckEqual(output[0], {2}, "Hello");
|
||||
CheckEqual(output[0], {3}, "世界");
|
||||
|
||||
input = std::make_shared<Tensor>(" ");
|
||||
output.clear();
|
||||
Tensor::CreateScalar<std::string>(" ", &input);
|
||||
output.clear();
|
||||
s = keep_whitespace_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
EXPECT_EQ(output[0]->Size(), 1);
|
||||
EXPECT_EQ(output[0]->Rank(), 1);
|
||||
MS_LOG(INFO) << "Out tensor10: " << output[0]->ToString();
|
||||
CheckEqual(output[0], {0}, " ");
|
||||
input = std::make_shared<Tensor>(" ");
|
||||
Tensor::CreateScalar<std::string>(" ", &input);
|
||||
output.clear();
|
||||
s = skip_whitespace_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -289,7 +286,9 @@ TEST_F(MindDataTestTokenizerOp, TestUnicodeScriptTokenizer) {
|
|||
TEST_F(MindDataTestTokenizerOp, TestCaseFold) {
|
||||
MS_LOG(INFO) << "Doing TestCaseFold.";
|
||||
std::unique_ptr<CaseFoldOp> case_fold_op(new CaseFoldOp());
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
|
||||
|
||||
std::shared_ptr<Tensor> output;
|
||||
Status s = case_fold_op->Compute(input, &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -305,7 +304,8 @@ TEST_F(MindDataTestTokenizerOp, TestNormalize) {
|
|||
std::unique_ptr<NormalizeUTF8Op> nfkc_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkc));
|
||||
std::unique_ptr<NormalizeUTF8Op> nfd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfd));
|
||||
std::unique_ptr<NormalizeUTF8Op> nfkd_normalize_op(new NormalizeUTF8Op(NormalizeForm::kNfkd));
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("ṩ");
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("ṩ", &input);
|
||||
std::shared_ptr<Tensor> output;
|
||||
Status s = nfc_normalize_op->Compute(input, &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -327,7 +327,8 @@ TEST_F(MindDataTestTokenizerOp, TestNormalize) {
|
|||
TEST_F(MindDataTestTokenizerOp, TestRegexReplace) {
|
||||
MS_LOG(INFO) << "Doing TestRegexReplace.";
|
||||
std::unique_ptr<RegexReplaceOp> regex_replace_op(new RegexReplaceOp("\\s+", "_", true));
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
|
||||
std::shared_ptr<Tensor> output;
|
||||
Status s = regex_replace_op->Compute(input, &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
@ -340,19 +341,20 @@ TEST_F(MindDataTestTokenizerOp, TestRegexReplace) {
|
|||
TEST_F(MindDataTestTokenizerOp, TestRegexTokenizer) {
|
||||
MS_LOG(INFO) << "Doing TestRegexTokenizerOp.";
|
||||
std::unique_ptr<RegexTokenizerOp> regex_tokenizer_op(new RegexTokenizerOp("\\p{Cc}|\\p{Cf}|\\s+", "", true));
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. \n 中国\t北京");
|
||||
TensorRow output;
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("Welcome to China. \n 中国\t北京", &input);
|
||||
TensorRow output;
|
||||
Status s = regex_tokenizer_op->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestTokenizerOp, TestBasicTokenizer) {
|
||||
MS_LOG(INFO) << "Doing TestBasicTokenizer.";
|
||||
//bool lower_case, bool keep_whitespace,
|
||||
// bool lower_case, bool keep_whitespace,
|
||||
// NormalizeForm normalization_form, bool preserve_unused_token
|
||||
std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false,
|
||||
true));
|
||||
std::shared_ptr<Tensor> input = std::make_shared<Tensor>("Welcome to China. 中国\t北京");
|
||||
std::unique_ptr<BasicTokenizerOp> basic_tokenizer(new BasicTokenizerOp(true, true, NormalizeForm::kNone, false,true));
|
||||
std::shared_ptr<Tensor> input;
|
||||
Tensor::CreateScalar<std::string>("Welcome to China. 中国\t北京", &input);
|
||||
TensorRow output;
|
||||
Status s = basic_tokenizer->Compute(TensorRow(0, {input}), &output);
|
||||
EXPECT_TRUE(s.IsOk());
|
||||
|
|
|
@ -35,17 +35,17 @@ class MindDataTestTruncatePairOp : public UT::Common {
|
|||
|
||||
TEST_F(MindDataTestTruncatePairOp, Basics) {
|
||||
std::shared_ptr<Tensor> t1;
|
||||
Tensor::CreateTensor(&t1, std::vector<uint32_t>({1, 2, 3}));
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({1, 2, 3}), &t1);
|
||||
std::shared_ptr<Tensor> t2;
|
||||
Tensor::CreateTensor(&t2, std::vector<uint32_t>({4, 5}));
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({4, 5}), &t2);
|
||||
TensorRow in({t1, t2});
|
||||
std::shared_ptr<TruncateSequencePairOp> op = std::make_shared<TruncateSequencePairOp>(4);
|
||||
TensorRow out;
|
||||
ASSERT_TRUE(op->Compute(in, &out).IsOk());
|
||||
std::shared_ptr<Tensor> out1;
|
||||
Tensor::CreateTensor(&out1, std::vector<uint32_t>({1, 2}));
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({1, 2}), &out1);
|
||||
std::shared_ptr<Tensor> out2;
|
||||
Tensor::CreateTensor(&out2, std::vector<uint32_t>({4, 5}));
|
||||
Tensor::CreateFromVector(std::vector<uint32_t>({4, 5}), &out2);
|
||||
ASSERT_EQ(*out1, *out[0]);
|
||||
ASSERT_EQ(*out2, *out[1]);
|
||||
}
|
||||
|
|
|
@ -43,16 +43,15 @@ class MindDataTestTypeCast : public UT::Common {
|
|||
|
||||
template<typename FROM, typename TO>
|
||||
void testCast(std::vector<FROM> values, const DataType &from, const DataType &to) {
|
||||
std::shared_ptr<Tensor> t = std::make_shared<Tensor>(TensorShape({static_cast<int64_t>(values.size())}),
|
||||
DataType(from),
|
||||
reinterpret_cast<unsigned char *>(&values[0]));
|
||||
std::shared_ptr<Tensor> t;
|
||||
Tensor::CreateFromVector(values, &t);
|
||||
|
||||
std::unique_ptr<TypeCastOp> op(new TypeCastOp(to));
|
||||
EXPECT_TRUE(op->OneToOne());
|
||||
std::shared_ptr<Tensor> output;
|
||||
EXPECT_TRUE(op->Compute(t, &output));
|
||||
ASSERT_TRUE(t->shape() == output->shape());
|
||||
ASSERT_TRUE(DataType(to)==output->type());
|
||||
ASSERT_TRUE(DataType(to) == output->type());
|
||||
MS_LOG(DEBUG) << *output << std::endl;
|
||||
auto out = output->begin<TO>();
|
||||
auto v = values.begin();
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
Testing Mask op in DE
|
||||
"""
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
import mindspore.dataset as ds
|
||||
import mindspore.dataset.text as text
|
||||
|
@ -55,9 +54,7 @@ def test_basics_str():
|
|||
|
||||
|
||||
def test_exceptions():
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1, 2], out2=[5])
|
||||
assert "Indices are empty, generated tensor would be empty" in str(info.value)
|
||||
compare(in1=[1, 2, 3, 4], in2=[5, 6, 7, 8], length=1, out1=[1], out2=[])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -121,21 +121,10 @@ def test_slice_exceptions():
|
|||
slice_compare([1, 2, 3, 4, 5], 5)
|
||||
assert "Index 5 is out of bounds [0,5)" in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([1, 2, 3, 4, 5], slice(0))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([1, 2, 3, 4, 5], slice(3, 1, 1))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([1, 2, 3, 4, 5], slice(5, 10, 1))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([1, 2, 3, 4, 5], slice(-1, -5, 1))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
slice_compare([1, 2, 3, 4, 5], slice(0))
|
||||
slice_compare([1, 2, 3, 4, 5], slice(3, 1, 1))
|
||||
slice_compare([1, 2, 3, 4, 5], slice(5, 10, 1))
|
||||
slice_compare([1, 2, 3, 4, 5], slice(-1, -5, 1))
|
||||
|
||||
|
||||
def test_slice_all_str():
|
||||
|
@ -198,21 +187,10 @@ def test_slice_exceptions_str():
|
|||
slice_compare([b"1", b"2", b"3", b"4", b"5"], 5)
|
||||
assert "Index 5 is out of bounds [0,5)" in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(3, 1, 1))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(5, 10, 1))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
|
||||
with pytest.raises(RuntimeError) as info:
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, 1))
|
||||
assert "Indices are empty, generated tensor would be empty." in str(info.value)
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(0))
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(3, 1, 1))
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(5, 10, 1))
|
||||
slice_compare([b"1", b"2", b"3", b"4", b"5"], slice(-1, -5, 1))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
@ -0,0 +1,72 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ==============================================================================
|
||||
import numpy as np
|
||||
|
||||
import mindspore.dataset as ds
|
||||
|
||||
|
||||
def test_tensor_empty():
|
||||
def gen():
|
||||
for _ in range(4):
|
||||
(yield np.array([], dtype=np.int64), np.array([], dtype='S').reshape([0, 4]), np.array([1],
|
||||
dtype=np.float64))
|
||||
|
||||
data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"])
|
||||
|
||||
for d in data:
|
||||
np.testing.assert_array_equal(np.array([], dtype=np.int64), d[0])
|
||||
np.testing.assert_array_equal(np.array([], dtype='S').reshape([0, 4]), d[1])
|
||||
np.testing.assert_array_equal(np.array([1], dtype=np.float64), d[2])
|
||||
|
||||
|
||||
def test_tensor_empty_map():
|
||||
def gen():
|
||||
for _ in range(4):
|
||||
(yield np.array([], dtype=np.int64), np.array([], dtype='S'), np.array([1], dtype=np.float64))
|
||||
|
||||
data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"])
|
||||
|
||||
def func(x, y, z):
|
||||
x = np.array([1], dtype=np.int64)
|
||||
y = np.array(["Hi"], dtype='S')
|
||||
z = np.array([], dtype=np.float64)
|
||||
return x, y, z
|
||||
|
||||
data = data.map(input_columns=["col1", "col2", "col3"], operations=func)
|
||||
|
||||
for d in data:
|
||||
np.testing.assert_array_equal(np.array([1], dtype=np.int64), d[0])
|
||||
np.testing.assert_array_equal(np.array(["Hi"], dtype='S'), d[1])
|
||||
np.testing.assert_array_equal(np.array([], dtype=np.float64), d[2])
|
||||
|
||||
|
||||
def test_tensor_empty_batch():
|
||||
def gen():
|
||||
for _ in range(4):
|
||||
(yield np.array([], dtype=np.int64), np.array([], dtype='S').reshape([0, 4]), np.array([1],
|
||||
dtype=np.float64))
|
||||
|
||||
data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"]).batch(2)
|
||||
|
||||
for d in data:
|
||||
np.testing.assert_array_equal(np.array([], dtype=np.int64).reshape([2, 0]), d[0])
|
||||
np.testing.assert_array_equal(np.array([], dtype='S').reshape([2, 0, 4]), d[1])
|
||||
np.testing.assert_array_equal(np.array([[1], [1]], dtype=np.float64), d[2])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_tensor_empty()
|
||||
test_tensor_empty_map()
|
||||
test_tensor_empty_batch()
|
Loading…
Reference in New Issue