forked from mindspore-Ecosystem/mindspore
Disable getter pass
This commit is contained in:
parent
5eb161d188
commit
db2a8b5e1d
|
@ -323,6 +323,7 @@ Status DatasetOp::GetNumClasses(int64_t *num_classes) {
|
|||
return child_[child_.size() - 1]->GetNumClasses(num_classes);
|
||||
} else {
|
||||
// when num classes isn't found, the default behavior is to return -1
|
||||
MS_LOG(WARNING) << "Num classes not defined for : " << Name();
|
||||
*num_classes = -1;
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -54,15 +54,7 @@ Status GetterPass::GetterNodes::RunOnNode(std::shared_ptr<FilterOp> node, bool *
|
|||
Status GetterPass::RunOnTree(ExecutionTree *tree, bool *modified) {
|
||||
RETURN_IF_NOT_OK(pass_.Run(tree, modified));
|
||||
|
||||
// nested private class variables can be directly accessed by its outer class
|
||||
for (auto node : pass_.nodes_to_remove_) {
|
||||
DatasetOp *parent;
|
||||
node->Parent(&parent, 0);
|
||||
// only remove node whose is a single child of its parent
|
||||
if (parent != nullptr && parent->Children().size() == 1) {
|
||||
RETURN_IF_NOT_OK(node->Remove());
|
||||
}
|
||||
}
|
||||
// currently the getter pass only disables call_back from the execution tree
|
||||
|
||||
// clear the callback for selected ops (map when its GetOutputType/Shape)
|
||||
for (auto node : pass_.nodes_to_clear_callback_) node->ClearCallbacks();
|
||||
|
|
|
@ -131,7 +131,7 @@ Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::string_view &text
|
|||
Status BasicTokenizerOp::CaseFoldWithoutUnusedWords(const std::shared_ptr<Tensor> &input,
|
||||
std::shared_ptr<Tensor> *output) {
|
||||
IO_CHECK(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
|
||||
std::vector<std::string> strs(input->Size());
|
||||
int i = 0;
|
||||
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace dataset {
|
|||
|
||||
Status CaseFoldOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
IO_CHECK(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
|
||||
icu::ErrorCode error;
|
||||
const icu::Normalizer2 *nfkc_case_fold = icu::Normalizer2::getNFKCCasefoldInstance(error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCCasefoldInstance failed.");
|
||||
|
|
|
@ -33,11 +33,11 @@ JiebaTokenizerOp::JiebaTokenizerOp(const std::string &hmm_path, const std::strin
|
|||
|
||||
Status JiebaTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
IO_CHECK_VECTOR(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
|
||||
RETURN_UNEXPECTED_IF_NULL(jieba_parser_);
|
||||
|
||||
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
|
||||
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor.");
|
||||
}
|
||||
|
||||
std::string_view sentence_v;
|
||||
|
|
|
@ -35,7 +35,7 @@ NgramOp::NgramOp(const std::vector<int32_t> &ngrams, int32_t l_len, int32_t r_le
|
|||
|
||||
Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
IO_CHECK(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING && input->Rank() == 1, "Not a 1-D str Tensor.");
|
||||
std::vector<int32_t> offsets; // offsets for each str
|
||||
std::vector<std::string> res; // holds the result of ngrams
|
||||
std::string str_buffer; // concat all pad tokens with string interleaved with separators
|
||||
|
@ -60,7 +60,7 @@ Status NgramOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Te
|
|||
if (end_ind - start_ind <= n) {
|
||||
res.emplace_back(std::string()); // push back empty string
|
||||
} else {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(end_ind - n >= 0, "Incorrect loop condition.");
|
||||
|
||||
for (int i = start_ind; i < end_ind - n; i++) {
|
||||
res.emplace_back(str_buffer.substr(offsets[i], offsets[i + n] - offsets[i] - separator_.size()));
|
||||
|
|
|
@ -29,7 +29,7 @@ namespace dataset {
|
|||
const NormalizeForm NormalizeUTF8Op::kDefNormalizeForm = NormalizeForm::kNfkc;
|
||||
Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
IO_CHECK(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
|
||||
|
||||
icu::ErrorCode error;
|
||||
const icu::Normalizer2 *normalize = nullptr;
|
||||
|
@ -40,26 +40,26 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
|
|||
}
|
||||
case NormalizeForm::kNfc: {
|
||||
normalize = icu::Normalizer2::getNFCInstance(error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFCInstance failed.");
|
||||
break;
|
||||
}
|
||||
case NormalizeForm::kNfkc: {
|
||||
normalize = icu::Normalizer2::getNFKCInstance(error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKCInstance failed.");
|
||||
break;
|
||||
}
|
||||
case NormalizeForm::kNfd: {
|
||||
normalize = icu::Normalizer2::getNFDInstance(error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFDInstance failed.");
|
||||
break;
|
||||
}
|
||||
case NormalizeForm::kNfkd: {
|
||||
normalize = icu::Normalizer2::getNFKDInstance(error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "getNFKDInstance failed.");
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
RETURN_STATUS_UNEXPECTED("unexpected normalize form");
|
||||
RETURN_STATUS_UNEXPECTED("Unexpected normalize form.");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -68,7 +68,7 @@ Status NormalizeUTF8Op::Compute(const std::shared_ptr<Tensor> &input, std::share
|
|||
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
|
||||
icu::StringByteSink<std::string> sink(&strs[i++]);
|
||||
normalize->normalizeUTF8(0, icu::StringPiece((*iter).data(), (*iter).size()), sink, nullptr, error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "normalizeUTF8 failed.");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(error.isSuccess(), "NormalizeUTF8 failed.");
|
||||
}
|
||||
return Tensor::CreateFromVector(strs, input->shape(), output);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ namespace dataset {
|
|||
|
||||
Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std::string_view &text,
|
||||
std::string *out) const {
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED((matcher != nullptr && out != nullptr), "Input is null.");
|
||||
UErrorCode icu_error = U_ZERO_ERROR;
|
||||
icu::UnicodeString unicode_text = icu::UnicodeString::fromUTF8(text);
|
||||
matcher->reset(unicode_text);
|
||||
|
@ -35,17 +35,18 @@ Status RegexReplaceOp::RegexReplace(icu::RegexMatcher *const matcher, const std:
|
|||
} else {
|
||||
unicode_out = matcher->replaceFirst(replace_, icu_error);
|
||||
}
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "RegexReplace failed.");
|
||||
unicode_out.toUTF8String(*out);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status RegexReplaceOp::Compute(const std::shared_ptr<Tensor> &input, std::shared_ptr<Tensor> *output) {
|
||||
IO_CHECK(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input->type() == DataType::DE_STRING, "Input tensor not of type string.");
|
||||
UErrorCode icu_error = U_ZERO_ERROR;
|
||||
icu::RegexMatcher matcher(pattern_, 0, icu_error);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error), "Create icu RegexMatcher failed, you may input one error pattern");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(U_SUCCESS(icu_error),
|
||||
"Create icu RegexMatcher failed, you may input one error pattern.");
|
||||
std::vector<std::string> strs(input->Size());
|
||||
int i = 0;
|
||||
for (auto iter = input->begin<std::string_view>(); iter != input->end<std::string_view>(); iter++) {
|
||||
|
|
|
@ -56,7 +56,7 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
|
|||
}
|
||||
|
||||
if (input->Rank() != 0 || input->type() != DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("the input tensor should be scalar string tensor");
|
||||
RETURN_STATUS_UNEXPECTED("Input tensor should be scalar string tensor.");
|
||||
}
|
||||
|
||||
std::string_view sentence_v;
|
||||
|
@ -67,14 +67,14 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
|
|||
std::vector<std::string> pieces;
|
||||
auto status = processor_.Encode(sentence, &pieces);
|
||||
if (!status.ok()) {
|
||||
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
|
||||
RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(pieces, output));
|
||||
} else {
|
||||
std::vector<int> ids;
|
||||
auto status = processor_.Encode(sentence, &ids);
|
||||
if (!status.ok()) {
|
||||
RETURN_STATUS_UNEXPECTED("sentence piece tokenizer error");
|
||||
RETURN_STATUS_UNEXPECTED("Sentence piece tokenizer error.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(Tensor::CreateFromVector(ids, output));
|
||||
}
|
||||
|
@ -84,15 +84,15 @@ Status SentencePieceTokenizerOp::Compute(const std::shared_ptr<Tensor> &input, s
|
|||
Status SentencePieceTokenizerOp::GetModelRealPath(const std::string &model_path, const std::string &filename) {
|
||||
char real_path[PATH_MAX] = {0};
|
||||
if (file_path_.size() >= PATH_MAX) {
|
||||
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
|
||||
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
|
||||
}
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
if (_fullpath(real_path, common::SafeCStr(model_path), PATH_MAX) == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
|
||||
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
|
||||
}
|
||||
#else
|
||||
if (realpath(common::SafeCStr(model_path), real_path) == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("sentence piece model path is invalid.");
|
||||
RETURN_STATUS_UNEXPECTED("Sentence piece model path is invalid.");
|
||||
}
|
||||
#endif
|
||||
std::string abs_path = real_path;
|
||||
|
|
|
@ -29,7 +29,7 @@ Status TruncateSequencePairOp::Compute(const TensorRow &input, TensorRow *output
|
|||
std::shared_ptr<Tensor> seq1 = input[0];
|
||||
std::shared_ptr<Tensor> seq2 = input[1];
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(seq1->shape().Rank() == 1 && seq2->shape().Rank() == 1,
|
||||
"Both sequences should be of rank 1");
|
||||
"Both sequences should be of rank 1.");
|
||||
dsize_t length1 = seq1->shape()[0];
|
||||
dsize_t length2 = seq2->shape()[0];
|
||||
dsize_t outLength1 = length1;
|
||||
|
|
|
@ -31,9 +31,9 @@ const bool UnicodeCharTokenizerOp::kDefWithOffsets = false;
|
|||
|
||||
Status UnicodeCharTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
IO_CHECK_VECTOR(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
|
||||
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor.");
|
||||
}
|
||||
std::string_view str;
|
||||
RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
|
||||
|
|
|
@ -35,9 +35,9 @@ const bool WhitespaceTokenizerOp::kDefWithOffsets = false;
|
|||
|
||||
Status WhitespaceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
IO_CHECK_VECTOR(input, output);
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor");
|
||||
CHECK_FAIL_RETURN_UNEXPECTED(input.size() == 1, "Input should be one tensor.");
|
||||
if (input[0]->Rank() != 0 || input[0]->type() != DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor");
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar string tensor.");
|
||||
}
|
||||
std::string_view str;
|
||||
RETURN_IF_NOT_OK(input[0]->GetItemAt(&str, {}));
|
||||
|
|
|
@ -117,7 +117,7 @@ Status WordpieceTokenizerOp::GetTokens(const std::string &input_token, const uin
|
|||
Status WordpieceTokenizerOp::Compute(const TensorRow &input, TensorRow *output) {
|
||||
IO_CHECK_VECTOR(input, output);
|
||||
if (input[0]->Rank() > 1 || input[0]->type() != DataType::DE_STRING) {
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor");
|
||||
RETURN_STATUS_UNEXPECTED("The input tensor should be scalar or 1-D string tensor.");
|
||||
}
|
||||
dsize_t count = 0;
|
||||
std::vector<std::string> out_tokens;
|
||||
|
|
|
@ -95,9 +95,9 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestOutputShapeAndTypePass) {
|
|||
// +- ( 4) <RandomDataOp>: [workers: 4] [total rows: 44]
|
||||
//
|
||||
|
||||
// verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not
|
||||
EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos);
|
||||
EXPECT_EQ(ss_str.find("RepeatOp"), ss_str.npos);
|
||||
// verify that no ops are removed, but Batch and ProjectOp are not
|
||||
EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos);
|
||||
EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos);
|
||||
EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos);
|
||||
EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos);
|
||||
}
|
||||
|
@ -129,8 +129,8 @@ TEST_F(MindDataTestOptimizationPass, MindDataTestDatasetSizePass) {
|
|||
exe_tree->Print(ss);
|
||||
std::string ss_str = ss.str();
|
||||
|
||||
// verify that Shuffle and RepeatOp are removed, but Batch and ProjectOp are not
|
||||
EXPECT_EQ(ss_str.find("ShuffleOp"), ss_str.npos);
|
||||
// verify that no ops are removed, but Batch and ProjectOp are not
|
||||
EXPECT_NE(ss_str.find("ShuffleOp"), ss_str.npos);
|
||||
EXPECT_NE(ss_str.find("RepeatOp"), ss_str.npos);
|
||||
EXPECT_NE(ss_str.find("ProjectOp"), ss_str.npos);
|
||||
EXPECT_NE(ss_str.find("BatchOp"), ss_str.npos);
|
||||
|
|
Loading…
Reference in New Issue