diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.cc b/mindspore/ccsrc/minddata/dataset/core/tensor.cc index fd81a90b4b8..2c7bbb5b51e 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.cc +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.cc @@ -526,16 +526,34 @@ Status Tensor::StartAddrOfIndex(std::vector ind, uchar **start_addr_of_ return Status::OK(); } -Status Tensor::InsertTensor(const std::vector &ind, const std::shared_ptr &tensor) { +Status Tensor::InsertTensor(const std::vector &ind, const std::shared_ptr &tensor, + const bool partial_insert) { std::string err_msg; - err_msg += (this->type() == DataType::DE_STRING) ? "[Tensor] Cannot batch tensors of type string\n" : ""; - err_msg += (!this->shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; - err_msg += (ind.size() + tensor->Rank() != this->Rank()) ? "[Tensor] incorrect index\n" : ""; - err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; + if (partial_insert) { + err_msg += (ind.size() != 1) + ? "[Tensor] only supports 1D insertion of elements not along the full length of the axis\n" + : ""; + err_msg += + (ind.at(0) + tensor->shape().NumOfElements() > shape().NumOfElements()) ? "[Tensor] incorrect index\n" : ""; + } else { + err_msg += (ind.size() + tensor->Rank() != Rank()) ? "[Tensor] incorrect index\n" : ""; + } + err_msg += (type() == DataType::DE_STRING) ? "[Tensor] Cannot insert into a tensor of type string\n" : ""; + err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; + + err_msg += tensor->type().SizeInBytes() != type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; uchar *start_addr_of_ind = nullptr; - TensorShape remaining_shape = TensorShape::CreateUnknownRankShape(); - err_msg += (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : ""; - err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : ""; + if (partial_insert) { + TensorShape remaining_shape = tensor->shape(); + err_msg += + (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : ""; + } else { + TensorShape remaining_shape = TensorShape::CreateUnknownRankShape(); + err_msg += + (!StartAddrOfIndex(ind, &start_addr_of_ind, &remaining_shape).IsOk()) ? "[Tensor] incorrect index\n" : ""; + err_msg += !(remaining_shape == tensor->shape()) ? "[Tensor] memory error\n" : ""; + } + if (!err_msg.empty()) { MS_LOG(DEBUG) << "Insert tensor message: " << err_msg; RETURN_STATUS_UNEXPECTED(err_msg); @@ -556,39 +574,6 @@ Status Tensor::InsertTensor(const std::vector &ind, const std::shared_p } } -Status Tensor::Concatenate(const std::vector &index, const std::shared_ptr &tensor) { - std::string err_msg; - err_msg += (index.size() != 1) ? "[Tensor] only supports 1d concatenation \n" : ""; - err_msg += (type() == DataType::DE_STRING) ? "[Tensor] Cannot batch tensors of type string\n" : ""; - err_msg += (!shape().known() || !tensor->shape().known()) ? "[Tensor] unknown shape\n" : ""; - - err_msg += - (index.at(0) + tensor->shape().NumOfElements() > this->shape().NumOfElements()) ? "[Tensor] incorrect index\n" : ""; - err_msg += tensor->type().SizeInBytes() != this->type().SizeInBytes() ? "[Tensor] incorrect datatype\n" : ""; - uchar *start_addr_of_ind = nullptr; - - TensorShape remaining_shape = tensor->shape(); - StartAddrOfIndex(index, &start_addr_of_ind, &remaining_shape); - err_msg += (start_addr_of_ind == nullptr) ? "Failed to create memory for Tensor.\n" : ""; - - if (!err_msg.empty()) { - MS_LOG(DEBUG) << "Insert tensor message: " << err_msg; - - RETURN_STATUS_UNEXPECTED(err_msg); - } else { - int ret_code = - memcpy_s(start_addr_of_ind, tensor->SizeInBytes(), tensor->GetMutableBuffer(), tensor->SizeInBytes()); - - if (ret_code == 0) { - return Status::OK(); - } else { - err_msg += "[Tensor] error in memcpy_s when inserting tensor\n"; - MS_LOG(DEBUG) << "Tensor message: " << err_msg; - RETURN_STATUS_UNEXPECTED(err_msg); - } - } -} - Status Tensor::ExpandDim(const dsize_t &axis) { if (axis > Rank()) { std::string err = "Axis is out of bound"; diff --git a/mindspore/ccsrc/minddata/dataset/core/tensor.h b/mindspore/ccsrc/minddata/dataset/core/tensor.h index 25e896e4a27..b2fe352c1df 100644 --- a/mindspore/ccsrc/minddata/dataset/core/tensor.h +++ b/mindspore/ccsrc/minddata/dataset/core/tensor.h @@ -330,8 +330,10 @@ class Tensor { /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. /// \param index /// \param input + /// \param partial_insert: boolean to determine if insertion along the full axis is enforced /// \return Status code - Status InsertTensor(const std::vector &index, const std::shared_ptr &input); + Status InsertTensor(const std::vector &index, const std::shared_ptr &input, + const bool partial_insert = false); /// Find the address of the given index. Used in InsertTensor. /// Example: @@ -393,9 +395,6 @@ class Tensor { static Status GetBufferInfo(Tensor *t, py::buffer_info *out); #endif - /// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor - Status Concatenate(const std::vector &index, const std::shared_ptr &input); - /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 /// \tparam T type of values in the Tensor Iterator diff --git a/mindspore/ccsrc/minddata/dataset/include/tensor.h b/mindspore/ccsrc/minddata/dataset/include/tensor.h index 25e896e4a27..b2fe352c1df 100644 --- a/mindspore/ccsrc/minddata/dataset/include/tensor.h +++ b/mindspore/ccsrc/minddata/dataset/include/tensor.h @@ -330,8 +330,10 @@ class Tensor { /// Ex: shape <2,3>, inserting into index {0} will replace the first row. index {1,2} will replace the last cell. /// \param index /// \param input + /// \param partial_insert: boolean to determine if insertion along the full axis is enforced /// \return Status code - Status InsertTensor(const std::vector &index, const std::shared_ptr &input); + Status InsertTensor(const std::vector &index, const std::shared_ptr &input, + const bool partial_insert = false); /// Find the address of the given index. Used in InsertTensor. /// Example: @@ -393,9 +395,6 @@ class Tensor { static Status GetBufferInfo(Tensor *t, py::buffer_info *out); #endif - /// Concatenate based on given tensor, can fill in current tensor with a smaller one, unlike InsertTensor - Status Concatenate(const std::vector &index, const std::shared_ptr &input); - /// TensorIterator is a linear iterator that can be used to iterate over the elements of the Tensor /// The order elements is as the memory layout (i.e., row-major) [[1,2,3],[4,5,6] --> 1,2,3,4,5,6 /// \tparam T type of values in the Tensor Iterator diff --git a/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc index 267120851b1..5632dddeec2 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc +++ b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.cc @@ -580,77 +580,73 @@ Status Mask(const std::shared_ptr &input, std::shared_ptr *outpu Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr prepend, std::shared_ptr append) { - CHECK_FAIL_RETURN_UNEXPECTED(input[0]->shape().Rank() == 1, "Only 1D tensors supported"); - CHECK_FAIL_RETURN_UNEXPECTED(axis == 0 || axis == -1, "Only concatenation along the last dimension supported"); - axis = Tensor::HandleNeg(axis, input[0]->shape().Rank()); CHECK_FAIL_RETURN_UNEXPECTED(axis == 0, "Only axis=0 is supported"); - std::shared_ptr out; + TensorShape t = TensorShape::CreateScalar(); + + DataType first_dtype = input[0]->type(); + + TensorRow tensor_list; + if (prepend != nullptr) { + CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == prepend->type(), "Tensor types do not match"); CHECK_FAIL_RETURN_UNEXPECTED(prepend->shape().Rank() == 1, "Only 1D tensors supported"); - RETURN_IF_NOT_OK(ConcatenateHelper(prepend, &out, axis, input[0])); - } else { - out = input[0]; + tensor_list.emplace_back(prepend); } - for (dsize_t i = 1; i < input.size(); i++) { - std::shared_ptr out_t; + + for (dsize_t i = 0; i < input.size(); i++) { + CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == input[i]->type(), "Tensor types do not match"); CHECK_FAIL_RETURN_UNEXPECTED(input[i]->shape().Rank() == 1, "Only 1D tensors supported"); - RETURN_IF_NOT_OK(ConcatenateHelper(out, &out_t, axis, input[i])); - out = out_t; + tensor_list.emplace_back(input[i]); } - std::shared_ptr out_t; + if (append != nullptr) { + CHECK_FAIL_RETURN_UNEXPECTED(first_dtype == append->type(), "Tensor types do not match"); CHECK_FAIL_RETURN_UNEXPECTED(append->shape().Rank() == 1, "Only 1D tensors supported"); - RETURN_IF_NOT_OK(ConcatenateHelper(out, &out_t, axis, append)); - } else { - out_t = out; + tensor_list.emplace_back(append); } - output->push_back(out_t); - return Status::OK(); -} - -Status ConcatenateHelper(const std::shared_ptr &input, std::shared_ptr *output, int8_t axis, - std::shared_ptr append) { - CHECK_FAIL_RETURN_UNEXPECTED(input->type() == append->type(), "Tensor types do not match"); - - TensorShape t({}); - - for (dsize_t i = 0; i < input->shape().Rank(); i++) { + // create final shape + for (dsize_t i = 0; i < tensor_list[0]->shape().Rank(); i++) { if (i != axis) { - t = t.AppendDim(input->shape()[i]); + t = t.AppendDim(tensor_list[0]->shape()[i]); } else { - dsize_t new_shape = input->shape()[i] + append->shape()[i]; - + dsize_t new_shape = 0; + for (dsize_t j = 0; j < tensor_list.size(); j++) { + new_shape = tensor_list[j]->shape()[i] + new_shape; + } t = t.AppendDim(new_shape); } } + std::shared_ptr out; - if (input->type().IsNumeric()) { - RETURN_IF_NOT_OK(Tensor::CreateEmpty(t, input->type(), &out)); + if (input[0]->type().IsNumeric()) { + RETURN_IF_NOT_OK(Tensor::CreateEmpty(t, tensor_list[0]->type(), &out)); + std::vector index(axis + 1, 0); - RETURN_IF_NOT_OK(out->Concatenate({0}, input)); - RETURN_IF_NOT_OK(out->Concatenate({input->shape()[0]}, append)); - *output = out; + int n = index.size() - 1; + for (dsize_t i = 0; i < tensor_list.size(); i++) { + RETURN_IF_NOT_OK(out->InsertTensor({index}, tensor_list[i], true)); + index[n] = index[n] + tensor_list[i]->shape()[axis]; + } } else { std::vector strings; - auto itr = input->begin(); - for (; itr != input->end(); itr++) { - strings.emplace_back(*itr); - } - itr = append->begin(); - for (; itr != append->end(); itr++) { - strings.emplace_back(*itr); + for (dsize_t i = 0; i < tensor_list.size(); i++) { + auto itr = tensor_list[i]->begin(); + for (; itr != tensor_list[i]->end(); itr++) { + strings.emplace_back(*itr); + } } RETURN_IF_NOT_OK(Tensor::CreateFromVector(strings, t, &out)); - - *output = out; } + output->push_back(out); + return Status::OK(); } + } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h index 4830995d5bf..5e82b410248 100644 --- a/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h +++ b/mindspore/ccsrc/minddata/dataset/kernels/data/data_utils.h @@ -152,11 +152,6 @@ Status Mask(const std::shared_ptr &input, std::shared_ptr *outpu Status Concatenate(const TensorRow &input, TensorRow *output, int8_t axis, std::shared_ptr prepend, std::shared_ptr append); - -// helper for concat, always append to the input, and pass that to the output -Status ConcatenateHelper(const std::shared_ptr &input, std::shared_ptr *output, int8_t axis, - std::shared_ptr append); - } // namespace dataset } // namespace mindspore diff --git a/tests/ut/cpp/dataset/concatenate_op_test.cc b/tests/ut/cpp/dataset/concatenate_op_test.cc index 0acf9a7eff1..4e1e29b2bec 100644 --- a/tests/ut/cpp/dataset/concatenate_op_test.cc +++ b/tests/ut/cpp/dataset/concatenate_op_test.cc @@ -28,9 +28,8 @@ class MindDataTestConcatenateOp : public UT::Common { }; TEST_F(MindDataTestConcatenateOp, TestOp) { - MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp."; + MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp-SingleRowinput."; std::vector labels = {1, 1, 2}; - TensorShape shape({3}); std::shared_ptr input; Tensor::CreateFromVector(labels, &input); @@ -57,3 +56,71 @@ TEST_F(MindDataTestConcatenateOp, TestOp) { MS_LOG(DEBUG) << *expected << std::endl; ASSERT_TRUE(*output == *expected); } + +TEST_F(MindDataTestConcatenateOp, TestOp2) { + MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp2-MultiInput."; + std::vector labels = {1, 12, 2}; + std::shared_ptr row_1; + Tensor::CreateFromVector(labels, &row_1); + + std::shared_ptr row_2; + Tensor::CreateFromVector(labels, &row_2); + + std::vector append_labels = {4, 4, 4}; + std::shared_ptr append; + Tensor::CreateFromVector(append_labels, &append); + + TensorRow tensor_list; + tensor_list.push_back(row_1); + tensor_list.push_back(row_2); + + std::shared_ptr output; + std::unique_ptr op(new ConcatenateOp(0, nullptr, append)); + + TensorRow out_row; + Status s = op->Compute(tensor_list, &out_row); + std::vector out = {1, 12, 2, 1, 12, 2, 4, 4, 4}; + + std::shared_ptr expected; + Tensor::CreateFromVector(out, &expected); + + output = out_row[0]; + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + ASSERT_TRUE(*output == *expected); +} + +TEST_F(MindDataTestConcatenateOp, TestOp3) { + MS_LOG(INFO) << "Doing MindDataTestConcatenate-TestOp3-Strings."; + std::vector labels = {"hello", "bye"}; + std::shared_ptr row_1; + Tensor::CreateFromVector(labels, &row_1); + + std::vector append_labels = {"1", "2", "3"}; + std::shared_ptr append; + Tensor::CreateFromVector(append_labels, &append); + + TensorRow tensor_list; + tensor_list.push_back(row_1); + + std::shared_ptr output; + std::unique_ptr op(new ConcatenateOp(0, nullptr, append)); + + TensorRow out_row; + Status s = op->Compute(tensor_list, &out_row); + std::vector out = {"hello", "bye", "1", "2", "3"}; + + std::shared_ptr expected; + Tensor::CreateFromVector(out, &expected); + + output = out_row[0]; + EXPECT_TRUE(s.IsOk()); + ASSERT_TRUE(output->shape() == expected->shape()); + ASSERT_TRUE(output->type() == expected->type()); + MS_LOG(DEBUG) << *output << std::endl; + MS_LOG(DEBUG) << *expected << std::endl; + ASSERT_TRUE(*output == *expected); +} diff --git a/tests/ut/cpp/dataset/tensor_test.cc b/tests/ut/cpp/dataset/tensor_test.cc index 47279874252..758b194835c 100644 --- a/tests/ut/cpp/dataset/tensor_test.cc +++ b/tests/ut/cpp/dataset/tensor_test.cc @@ -432,7 +432,7 @@ TEST_F(MindDataTestTensorDE, TensorSlice) { ASSERT_EQ(*t2, *t); } -TEST_F(MindDataTestTensorDE, TensorConcatenate) { +TEST_F(MindDataTestTensorDE, TensorPartialInsert) { std::vector values1 = {1, 2, 3, 0, 0, 0}; std::vector values2 = {4, 5, 6}; std::vector expected = {1, 2, 3, 4, 5, 6}; @@ -445,7 +445,7 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) { std::shared_ptr out; Tensor::CreateFromVector(expected, &out); - Status s = t1->Concatenate({3}, t2); + Status s = t1->InsertTensor({3}, t2, true); EXPECT_TRUE(s.IsOk()); auto i = out->begin(); @@ -455,7 +455,7 @@ TEST_F(MindDataTestTensorDE, TensorConcatenate) { } // should fail if the concatenated vector is too large - s = t1->Concatenate({5}, t2); + s = t1->InsertTensor({5}, t2, true); EXPECT_FALSE(s.IsOk()); } diff --git a/tests/ut/python/dataset/test_concatenate_op.py b/tests/ut/python/dataset/test_concatenate_op.py index f7a432e4716..d60cff06c5a 100644 --- a/tests/ut/python/dataset/test_concatenate_op.py +++ b/tests/ut/python/dataset/test_concatenate_op.py @@ -130,7 +130,7 @@ def test_concatenate_op_incorrect_dim(): def gen(): yield (np.array([["ss", "ad"], ["ss", "ad"]], dtype='S'),) - prepend_tensor = np.array([3, 5], dtype=np.float) + prepend_tensor = np.array(["ss", "ss"], dtype='S') concatenate_op = data_trans.Concatenate(0, prepend_tensor) data = ds.GeneratorDataset(gen, column_names=["col"])