forked from mindspore-Ecosystem/mindspore
!9609 add ToNumber and TruncateSequencePair C++ API
From: @tiancixiao Reviewed-by: Signed-off-by:
This commit is contained in:
commit
81f1283dd2
|
@ -32,6 +32,8 @@
|
|||
#endif
|
||||
#include "minddata/dataset/text/kernels/sentence_piece_tokenizer_op.h"
|
||||
#include "minddata/dataset/text/kernels/sliding_window_op.h"
|
||||
#include "minddata/dataset/text/kernels/to_number_op.h"
|
||||
#include "minddata/dataset/text/kernels/truncate_sequence_pair_op.h"
|
||||
#include "minddata/dataset/text/kernels/unicode_char_tokenizer_op.h"
|
||||
#ifndef _WIN32
|
||||
#include "minddata/dataset/text/kernels/unicode_script_tokenizer_op.h"
|
||||
|
@ -140,6 +142,18 @@ std::shared_ptr<SlidingWindowOperation> SlidingWindow(const int32_t width, const
|
|||
return op->ValidateParams() ? op : nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<ToNumberOperation> ToNumber(const DataType data_type) {
|
||||
auto op = std::make_shared<ToNumberOperation>(data_type);
|
||||
|
||||
return op->ValidateParams() ? op : nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<TruncateSequencePairOperation> TruncateSequencePair(int32_t max_length) {
|
||||
auto op = std::make_shared<TruncateSequencePairOperation>(max_length);
|
||||
|
||||
return op->ValidateParams() ? op : nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<UnicodeCharTokenizerOperation> UnicodeCharTokenizer(bool with_offsets) {
|
||||
auto op = std::make_shared<UnicodeCharTokenizerOperation>(with_offsets);
|
||||
|
||||
|
@ -461,6 +475,43 @@ std::shared_ptr<TensorOp> SlidingWindowOperation::Build() {
|
|||
return tensor_op;
|
||||
}
|
||||
|
||||
// ToNumberOperation
|
||||
ToNumberOperation::ToNumberOperation(DataType data_type) : data_type_(data_type) {}
|
||||
|
||||
Status ToNumberOperation::ValidateParams() {
|
||||
if (!data_type_.IsNumeric()) {
|
||||
std::string err_msg =
|
||||
"ToNumber : The parameter data_type must be a numeric type: " + std::to_string(data_type_.value());
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> ToNumberOperation::Build() {
|
||||
std::shared_ptr<ToNumberOp> tensor_op = std::make_shared<ToNumberOp>(data_type_);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
TruncateSequencePairOperation::TruncateSequencePairOperation(int32_t max_length) : max_length_(max_length) {}
|
||||
|
||||
Status TruncateSequencePairOperation::ValidateParams() {
|
||||
if (max_length_ < 0) {
|
||||
std::string err_msg = "TruncateSequencePair : The parameter max_length must be greater than or equal to 0: " +
|
||||
std::to_string(max_length_);
|
||||
MS_LOG(ERROR) << err_msg;
|
||||
RETURN_STATUS_SYNTAX_ERROR(err_msg);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::shared_ptr<TensorOp> TruncateSequencePairOperation::Build() {
|
||||
std::shared_ptr<TruncateSequencePairOp> tensor_op = std::make_shared<TruncateSequencePairOp>(max_length_);
|
||||
return tensor_op;
|
||||
}
|
||||
|
||||
// UnicodeCharTokenizerOperation
|
||||
UnicodeCharTokenizerOperation::UnicodeCharTokenizerOperation(bool with_offsets) : with_offsets_(with_offsets) {}
|
||||
|
||||
|
|
|
@ -49,6 +49,8 @@ constexpr char kRegexReplaceOperation[] = "RegexReplace";
|
|||
constexpr char kRegexTokenizerOperation[] = "RegexTokenizer";
|
||||
constexpr char kSentencepieceTokenizerOperation[] = "SentencepieceTokenizer";
|
||||
constexpr char kSlidingWindowOperation[] = "SlidingWindow";
|
||||
constexpr char kToNumberOperation[] = "ToNumber";
|
||||
constexpr char kTruncateSequencePairOperation[] = "TruncateSequencePair";
|
||||
constexpr char kUnicodeCharTokenizerOperation[] = "UnicodeCharTokenizer";
|
||||
constexpr char kUnicodeScriptTokenizerOperation[] = "UnicodeScriptTokenizer";
|
||||
constexpr char kWhitespaceTokenizerOperation[] = "WhitespaceTokenizer";
|
||||
|
@ -69,6 +71,8 @@ class RegexTokenizerOperation;
|
|||
#endif
|
||||
class SentencePieceTokenizerOperation;
|
||||
class SlidingWindowOperation;
|
||||
class ToNumberOperation;
|
||||
class TruncateSequencePairOperation;
|
||||
class UnicodeCharTokenizerOperation;
|
||||
#ifndef _WIN32
|
||||
class UnicodeScriptTokenizerOperation;
|
||||
|
@ -216,6 +220,20 @@ std::shared_ptr<SentencePieceTokenizerOperation> SentencePieceTokenizer(
|
|||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<SlidingWindowOperation> SlidingWindow(const int32_t width, const int32_t axis = 0);
|
||||
|
||||
/// \brief Tensor operation to convert every element of a string tensor to a number.
|
||||
/// Strings are casted according to the rules specified in the following links:
|
||||
/// https://en.cppreference.com/w/cpp/string/basic_string/stof,
|
||||
/// https://en.cppreference.com/w/cpp/string/basic_string/stoul,
|
||||
/// except that any strings which represent negative numbers cannot be cast to an unsigned integer type.
|
||||
/// \param[in] data_type DataType of the tensor to be casted to. Must be a numeric type.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<ToNumberOperation> ToNumber(const DataType data_type);
|
||||
|
||||
/// \brief Truncate a pair of rank-1 tensors such that the total length is less than max_length.
|
||||
/// \param[in] max_length Maximum length required.
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
std::shared_ptr<TruncateSequencePairOperation> TruncateSequencePair(int32_t max_length);
|
||||
|
||||
/// \brief Tokenize a scalar tensor of UTF-8 string to Unicode characters.
|
||||
/// \param[in] with_offsets If or not output offsets of tokens (default=false).
|
||||
/// \return Shared pointer to the current TensorOperation.
|
||||
|
@ -452,6 +470,38 @@ class SlidingWindowOperation : public TensorOperation {
|
|||
int32_t axis_;
|
||||
};
|
||||
|
||||
class ToNumberOperation : public TensorOperation {
|
||||
public:
|
||||
explicit ToNumberOperation(DataType data_type);
|
||||
|
||||
~ToNumberOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kToNumberOperation; }
|
||||
|
||||
private:
|
||||
DataType data_type_;
|
||||
};
|
||||
|
||||
class TruncateSequencePairOperation : public TensorOperation {
|
||||
public:
|
||||
explicit TruncateSequencePairOperation(int32_t max_length);
|
||||
|
||||
~TruncateSequencePairOperation() = default;
|
||||
|
||||
std::shared_ptr<TensorOp> Build() override;
|
||||
|
||||
Status ValidateParams() override;
|
||||
|
||||
std::string Name() const override { return kTruncateSequencePairOperation; }
|
||||
|
||||
private:
|
||||
int32_t max_length_;
|
||||
};
|
||||
|
||||
class UnicodeCharTokenizerOperation : public TensorOperation {
|
||||
public:
|
||||
explicit UnicodeCharTokenizerOperation(bool with_offsets);
|
||||
|
|
|
@ -730,7 +730,7 @@ class TruncateSequencePair(cde.TruncateSequencePairOp):
|
|||
"""
|
||||
Truncate a pair of rank-1 tensors such that the total length is less than max_length.
|
||||
|
||||
This operation takes two input tensors and returns two output Tenors.
|
||||
This operation takes two input tensors and returns two output Tensors.
|
||||
|
||||
Args:
|
||||
max_length (int): Maximum length required.
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
#include <string>
|
||||
|
||||
#include "common/common.h"
|
||||
#include "minddata/dataset/include/config.h"
|
||||
#include "minddata/dataset/include/datasets.h"
|
||||
#include "minddata/dataset/include/status.h"
|
||||
#include "minddata/dataset/include/transforms.h"
|
||||
|
@ -995,6 +996,428 @@ TEST_F(MindDataTestPipeline, TestSlidingWindowFail) {
|
|||
EXPECT_EQ(sliding_window1, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestToNumberSuccess1) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestToNumberSuccess1.";
|
||||
// Test ToNumber with integer numbers
|
||||
|
||||
std::string data_file = datasets_root_path_ + "/testTokenizerData/to_number.txt";
|
||||
|
||||
// Create a TextFile dataset
|
||||
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Take operation on ds
|
||||
ds = ds->Take(8);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create ToNumber operation on ds
|
||||
std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("int64"));
|
||||
EXPECT_NE(to_number, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({to_number}, {"text"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
std::vector<int64_t> expected = {-121, 14, -2219, 7623, -8162536, 162371864, -1726483716, 98921728421};
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto ind = row["text"];
|
||||
std::shared_ptr<Tensor> expected_tensor;
|
||||
Tensor::CreateScalar(expected[i], &expected_tensor);
|
||||
EXPECT_EQ(*ind, *expected_tensor);
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 8);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestToNumberSuccess2) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestToNumberSuccess2.";
|
||||
// Test ToNumber with float numbers
|
||||
|
||||
std::string data_file = datasets_root_path_ + "/testTokenizerData/to_number.txt";
|
||||
|
||||
// Create a TextFile dataset
|
||||
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Skip operation on ds
|
||||
ds = ds->Skip(8);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Take operation on ds
|
||||
ds = ds->Take(6);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create ToNumber operation on ds
|
||||
std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("float64"));
|
||||
EXPECT_NE(to_number, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({to_number}, {"text"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
std::vector<double_t> expected = {-1.1, 1.4, -2219.321, 7623.453, -816256.234282, 162371864.243243};
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto ind = row["text"];
|
||||
std::shared_ptr<Tensor> expected_tensor;
|
||||
Tensor::CreateScalar(expected[i], &expected_tensor);
|
||||
EXPECT_EQ(*ind, *expected_tensor);
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 6);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestToNumberFail1) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestToNumberFail1.";
|
||||
// Test ToNumber with overflow integer numbers
|
||||
|
||||
std::string data_file = datasets_root_path_ + "/testTokenizerData/to_number.txt";
|
||||
|
||||
// Create a TextFile dataset
|
||||
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Skip operation on ds
|
||||
ds = ds->Skip(2);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Take operation on ds
|
||||
ds = ds->Take(6);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create ToNumber operation on ds
|
||||
std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("int8"));
|
||||
EXPECT_NE(to_number, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({to_number}, {"text"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
|
||||
// Expect error: input out of bounds of int8
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
// Expect failure: GetNextRow fail and return nothing
|
||||
EXPECT_EQ(i, 0);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestToNumberFail2) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestToNumberFail2.";
|
||||
// Test ToNumber with overflow float numbers
|
||||
|
||||
std::string data_file = datasets_root_path_ + "/testTokenizerData/to_number.txt";
|
||||
|
||||
// Create a TextFile dataset
|
||||
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Skip operation on ds
|
||||
ds = ds->Skip(12);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Take operation on ds
|
||||
ds = ds->Take(2);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create ToNumber operation on ds
|
||||
std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("float16"));
|
||||
EXPECT_NE(to_number, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({to_number}, {"text"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
|
||||
// Expect error: input out of bounds of float16
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
// Expect failure: GetNextRow fail and return nothing
|
||||
EXPECT_EQ(i, 0);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestToNumberFail3) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestToNumberFail3.";
|
||||
// Test ToNumber with non numerical input
|
||||
|
||||
std::string data_file = datasets_root_path_ + "/testTokenizerData/to_number.txt";
|
||||
|
||||
// Create a TextFile dataset
|
||||
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a Skip operation on ds
|
||||
ds = ds->Skip(14);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create ToNumber operation on ds
|
||||
std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("int64"));
|
||||
EXPECT_NE(to_number, nullptr);
|
||||
|
||||
// Create a Map operation on ds
|
||||
ds = ds->Map({to_number}, {"text"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
|
||||
// Expect error: invalid input which is non numerical
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
// Expect failure: GetNextRow fail and return nothing
|
||||
EXPECT_EQ(i, 0);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestToNumberFail4) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestToNumberFail4.";
|
||||
// Test ToNumber with non numerical DataType
|
||||
|
||||
std::string data_file = datasets_root_path_ + "/testTokenizerData/to_number.txt";
|
||||
|
||||
// Create a TextFile dataset
|
||||
std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create ToNumber operation on ds
|
||||
std::shared_ptr<TensorOperation> to_number = text::ToNumber(DataType("string"));
|
||||
|
||||
// Expect failure: invalid parameter with non numerical DataType
|
||||
EXPECT_EQ(to_number, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestTruncateSequencePairSuccess1) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTruncateSequencePairSuccess1.";
|
||||
// Testing basic TruncateSequencePair
|
||||
|
||||
// Set seed for RandomDataset
|
||||
auto original_seed = config::get_seed();
|
||||
bool status_set_seed = config::set_seed(0);
|
||||
EXPECT_EQ(status_set_seed, true);
|
||||
|
||||
// Set num_parallel_workers for RandomDataset
|
||||
auto original_worker = config::get_num_parallel_workers();
|
||||
bool status_set_worker = config::set_num_parallel_workers(1);
|
||||
EXPECT_EQ(status_set_worker, true);
|
||||
|
||||
// Create a RandomDataset which has column names "col1" and "col2"
|
||||
std::shared_ptr<SchemaObj> schema = Schema();
|
||||
schema->add_column("col1", mindspore::TypeId::kNumberTypeInt16, {5});
|
||||
schema->add_column("col2", mindspore::TypeId::kNumberTypeInt32, {3});
|
||||
std::shared_ptr<Dataset> ds = RandomData(3, schema);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a truncate_sequence_pair operation on ds
|
||||
std::shared_ptr<TensorOperation> truncate_sequence_pair = text::TruncateSequencePair(4);
|
||||
EXPECT_NE(truncate_sequence_pair, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({truncate_sequence_pair}, {"col1", "col2"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
std::vector<std::vector<int16_t>> expected1 = {{-29556, -29556}, {-18505, -18505}, {-25958, -25958}};
|
||||
std::vector<std::vector<int32_t>> expected2 = {
|
||||
{-1751672937, -1751672937}, {-656877352, -656877352}, {-606348325, -606348325}};
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto ind1 = row["col1"];
|
||||
auto ind2 = row["col2"];
|
||||
std::shared_ptr<Tensor> expected_tensor1;
|
||||
std::shared_ptr<Tensor> expected_tensor2;
|
||||
Tensor::CreateFromVector(expected1[i], &expected_tensor1);
|
||||
Tensor::CreateFromVector(expected2[i], &expected_tensor2);
|
||||
EXPECT_EQ(*ind1, *expected_tensor1);
|
||||
EXPECT_EQ(*ind2, *expected_tensor2);
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 3);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
|
||||
// Restore original seed and num_parallel_workers
|
||||
status_set_seed = config::set_seed(original_seed);
|
||||
EXPECT_EQ(status_set_seed, true);
|
||||
status_set_worker = config::set_num_parallel_workers(original_worker);
|
||||
EXPECT_EQ(status_set_worker, true);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestTruncateSequencePairSuccess2) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTruncateSequencePairSuccess2.";
|
||||
// Testing basic TruncateSequencePair with odd max_length
|
||||
|
||||
// Set seed for RandomDataset
|
||||
auto original_seed = config::get_seed();
|
||||
bool status_set_seed = config::set_seed(1);
|
||||
EXPECT_EQ(status_set_seed, true);
|
||||
|
||||
// Set num_parallel_workers for RandomDataset
|
||||
auto original_worker = config::get_num_parallel_workers();
|
||||
bool status_set_worker = config::set_num_parallel_workers(1);
|
||||
EXPECT_EQ(status_set_worker, true);
|
||||
|
||||
// Create a RandomDataset which has column names "col1" and "col2"
|
||||
std::shared_ptr<SchemaObj> schema = Schema();
|
||||
schema->add_column("col1", mindspore::TypeId::kNumberTypeInt32, {4});
|
||||
schema->add_column("col2", mindspore::TypeId::kNumberTypeInt64, {4});
|
||||
std::shared_ptr<Dataset> ds = RandomData(4, schema);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a truncate_sequence_pair operation on ds
|
||||
std::shared_ptr<TensorOperation> truncate_sequence_pair = text::TruncateSequencePair(5);
|
||||
EXPECT_NE(truncate_sequence_pair, nullptr);
|
||||
|
||||
// Create Map operation on ds
|
||||
ds = ds->Map({truncate_sequence_pair}, {"col1", "col2"});
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create an iterator over the result of the above dataset
|
||||
// This will trigger the creation of the Execution Tree and launch it.
|
||||
std::shared_ptr<Iterator> iter = ds->CreateIterator();
|
||||
EXPECT_NE(iter, nullptr);
|
||||
|
||||
// Iterate the dataset and get each row
|
||||
std::unordered_map<std::string, std::shared_ptr<Tensor>> row;
|
||||
iter->GetNextRow(&row);
|
||||
|
||||
std::vector<std::vector<int32_t>> expected1 = {{1785358954, 1785358954, 1785358954},
|
||||
{-1195853640, -1195853640, -1195853640},
|
||||
{0, 0, 0},
|
||||
{1296911693, 1296911693, 1296911693}};
|
||||
std::vector<std::vector<int64_t>> expected2 = {
|
||||
{-1, -1}, {-1229782938247303442, -1229782938247303442}, {2314885530818453536, 2314885530818453536}, {-1, -1}};
|
||||
|
||||
uint64_t i = 0;
|
||||
while (row.size() != 0) {
|
||||
auto ind1 = row["col1"];
|
||||
auto ind2 = row["col2"];
|
||||
std::shared_ptr<Tensor> expected_tensor1;
|
||||
std::shared_ptr<Tensor> expected_tensor2;
|
||||
Tensor::CreateFromVector(expected1[i], &expected_tensor1);
|
||||
Tensor::CreateFromVector(expected2[i], &expected_tensor2);
|
||||
EXPECT_EQ(*ind1, *expected_tensor1);
|
||||
EXPECT_EQ(*ind2, *expected_tensor2);
|
||||
iter->GetNextRow(&row);
|
||||
i++;
|
||||
}
|
||||
|
||||
EXPECT_EQ(i, 4);
|
||||
|
||||
// Manually terminate the pipeline
|
||||
iter->Stop();
|
||||
|
||||
// Restore original seed and num_parallel_workers
|
||||
status_set_seed = config::set_seed(original_seed);
|
||||
EXPECT_EQ(status_set_seed, true);
|
||||
status_set_worker = config::set_num_parallel_workers(original_worker);
|
||||
EXPECT_EQ(status_set_worker, true);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestTruncateSequencePairFail) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestTruncateSequencePairFail.";
|
||||
// Testing TruncateSequencePair with negative max_length
|
||||
|
||||
// Create a RandomDataset which has column names "col1" and "col2"
|
||||
std::shared_ptr<SchemaObj> schema = Schema();
|
||||
schema->add_column("col1", mindspore::TypeId::kNumberTypeInt8, {3});
|
||||
schema->add_column("col2", mindspore::TypeId::kNumberTypeInt8, {3});
|
||||
std::shared_ptr<Dataset> ds = RandomData(3, schema);
|
||||
EXPECT_NE(ds, nullptr);
|
||||
|
||||
// Create a truncate_sequence_pair operation on ds
|
||||
std::shared_ptr<TensorOperation> truncate_sequence_pair = text::TruncateSequencePair(-1);
|
||||
|
||||
// Expect failure: invalid parameter with negative max_length
|
||||
EXPECT_EQ(truncate_sequence_pair, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, TestNgramSuccess) {
|
||||
// Testing the parameter of Ngram interface.
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestNgramSuccess.";
|
||||
|
|
|
@ -514,7 +514,7 @@ TEST_F(MindDataTestPipeline, TestCutOutFail1) {
|
|||
EXPECT_EQ(cutout_op, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, DISABLED_TestCutOutFail2) {
|
||||
TEST_F(MindDataTestPipeline, TestCutOutFail2) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestCutOutFail2 with invalid params, boundary cases.";
|
||||
|
||||
// Create object for the tensor op
|
||||
|
@ -1407,12 +1407,12 @@ TEST_F(MindDataTestPipeline, TestRandomCropWithBboxFail) {
|
|||
EXPECT_EQ(random_crop7, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, DISABLED_TestRandomHorizontalFlipFail) {
|
||||
TEST_F(MindDataTestPipeline, TestRandomHorizontalFlipFail) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomHorizontalFlipFail with invalid parameters.";
|
||||
|
||||
// Create object for the tensor op
|
||||
// Invalid zero input
|
||||
std::shared_ptr<TensorOperation> random_horizontal_flip_op = vision::RandomHorizontalFlip(0);
|
||||
// Invalid negative input
|
||||
std::shared_ptr<TensorOperation> random_horizontal_flip_op = vision::RandomHorizontalFlip(-0.5);
|
||||
EXPECT_EQ(random_horizontal_flip_op, nullptr);
|
||||
// Invalid >1 input
|
||||
random_horizontal_flip_op = vision::RandomHorizontalFlip(2);
|
||||
|
@ -2423,12 +2423,12 @@ TEST_F(MindDataTestPipeline, TestRandomSolarizeFail) {
|
|||
EXPECT_EQ(random_solarize, nullptr);
|
||||
}
|
||||
|
||||
TEST_F(MindDataTestPipeline, DISABLED_TestRandomVerticalFlipFail) {
|
||||
TEST_F(MindDataTestPipeline, TestRandomVerticalFlipFail) {
|
||||
MS_LOG(INFO) << "Doing MindDataTestPipeline-TestRandomVerticalFlipFail with invalid parameters.";
|
||||
|
||||
// Create object for the tensor op
|
||||
// Invalid zero input
|
||||
std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(0);
|
||||
// Invalid negative input
|
||||
std::shared_ptr<TensorOperation> random_vertical_flip_op = vision::RandomVerticalFlip(-0.5);
|
||||
EXPECT_EQ(random_vertical_flip_op, nullptr);
|
||||
// Invalid >1 input
|
||||
random_vertical_flip_op = vision::RandomVerticalFlip(1.1);
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
-121
|
||||
14
|
||||
-2219
|
||||
7623
|
||||
-8162536
|
||||
162371864
|
||||
-1726483716
|
||||
98921728421
|
||||
-1.1
|
||||
1.4
|
||||
-2219.321
|
||||
7623.453
|
||||
-816256.234282
|
||||
162371864.243243
|
||||
This is a string.
|
Loading…
Reference in New Issue