!14907 Address codex warning for CLUENode::CreateKepMapForBuild() r1.2 branch

From: @lixiachen
Reviewed-by: @robingrosman,@liucunwei
Signed-off-by: @liucunwei
This commit is contained in:
mindspore-ci-bot 2021-04-10 16:35:16 +08:00 committed by Gitee
commit 1766f12888
2 changed files with 98 additions and 79 deletions

View File

@ -83,88 +83,87 @@ std::vector<std::string> CLUENode::split(const std::string &s, char delim) {
return res; return res;
} }
std::map<std::string, std::string> CLUENode::CreateKeyMapForBuild() { std::map<std::string, std::string> CLUENode::CreateKeyMapForAFQMCOrCMNLITask() {
std::map<std::string, std::string> key_map; std::map<std::string, std::string> key_map;
if (task_ == "AFQMC") { if (usage_ == "train" || usage_ == "eval") {
if (usage_ == "train" || usage_ == "eval") { key_map["label"] = "label";
key_map["sentence1"] = "sentence1"; } else { // usage_ == "test"
key_map["sentence2"] = "sentence2"; key_map["id"] = "id";
key_map["label"] = "label";
} else { // usage_ == "test"
key_map["id"] = "id";
key_map["sentence1"] = "sentence1";
key_map["sentence2"] = "sentence2";
}
} }
if (task_ == "CMNLI") { key_map["sentence1"] = "sentence1";
if (usage_ == "train" || usage_ == "eval") { key_map["sentence2"] = "sentence2";
key_map["sentence1"] = "sentence1"; return key_map;
key_map["sentence2"] = "sentence2"; }
key_map["label"] = "label";
} else { // usage_ == "test" std::map<std::string, std::string> CLUENode::CreateKeyMapForCSLTask() {
key_map["id"] = "id"; std::map<std::string, std::string> key_map;
key_map["sentence1"] = "sentence1"; if (usage_ == "train" || usage_ == "eval") {
key_map["sentence2"] = "sentence2"; key_map["label"] = "label";
}
} }
if (task_ == "CSL") { key_map["id"] = "id";
if (usage_ == "train" || usage_ == "eval") { key_map["abst"] = "abst";
key_map["id"] = "id"; key_map["keyword"] = "keyword";
key_map["abst"] = "abst"; return key_map;
key_map["keyword"] = "keyword"; }
key_map["label"] = "label";
} else { // usage_ == "test" std::map<std::string, std::string> CLUENode::CreateKeyMapForIFLYTEKTask() {
key_map["id"] = "id"; std::map<std::string, std::string> key_map;
key_map["abst"] = "abst"; if (usage_ == "train" || usage_ == "eval") {
key_map["keyword"] = "keyword"; key_map["label"] = "label";
} key_map["label_des"] = "label_des";
} else { // usage_ == "test"
key_map["id"] = "id";
} }
if (task_ == "IFLYTEK") { key_map["sentence"] = "sentence";
if (usage_ == "train" || usage_ == "eval") { return key_map;
key_map["label"] = "label"; }
key_map["label_des"] = "label_des";
key_map["sentence"] = "sentence"; std::map<std::string, std::string> CLUENode::CreateKeyMapForTNEWSTask() {
} else { // usage_ == "test" std::map<std::string, std::string> key_map;
key_map["id"] = "id"; if (usage_ == "train" || usage_ == "eval") {
key_map["sentence"] = "sentence"; key_map["label"] = "label";
} key_map["label_desc"] = "label_desc";
} else { // usage_ == "test"
key_map["id"] = "id";
} }
if (task_ == "TNEWS") { key_map["sentence"] = "sentence";
if (usage_ == "train" || usage_ == "eval") { key_map["keywords"] = "keywords";
key_map["label"] = "label"; return key_map;
key_map["label_desc"] = "label_desc"; }
key_map["sentence"] = "sentence";
key_map["keywords"] = "keywords"; std::map<std::string, std::string> CLUENode::CreateKeyMapForWSCTask() {
} else { // usage_ == "test" std::map<std::string, std::string> key_map;
key_map["id"] = "id"; if (usage_ == "train" || usage_ == "eval") {
key_map["sentence"] = "sentence"; key_map["label"] = "label";
key_map["keywords"] = "keywords";
}
} }
if (task_ == "WSC") { key_map["span1_index"] = "target/span1_index";
if (usage_ == "train" || usage_ == "eval") { key_map["span2_index"] = "target/span2_index";
key_map["span1_index"] = "target/span1_index"; key_map["span1_text"] = "target/span1_text";
key_map["span2_index"] = "target/span2_index"; key_map["span2_text"] = "target/span2_text";
key_map["span1_text"] = "target/span1_text"; key_map["idx"] = "idx";
key_map["span2_text"] = "target/span2_text"; key_map["text"] = "text";
key_map["idx"] = "idx"; return key_map;
key_map["label"] = "label"; }
key_map["text"] = "text";
} else { // usage_ == "test" std::map<std::string, std::string> CLUENode::CreateKeyMap() {
key_map["span1_index"] = "target/span1_index"; std::map<std::string, std::string> key_map;
key_map["span2_index"] = "target/span2_index"; if (task_ == "AFQMC" || task_ == "CMNLI") {
key_map["span1_text"] = "target/span1_text"; key_map = CreateKeyMapForAFQMCOrCMNLITask();
key_map["span2_text"] = "target/span2_text"; } else if (task_ == "CSL") {
key_map["idx"] = "idx"; key_map = CreateKeyMapForCSLTask();
key_map["text"] = "text"; } else if (task_ == "IFLYTEK") {
} key_map = CreateKeyMapForIFLYTEKTask();
} else if (task_ == "TNEWS") {
key_map = CreateKeyMapForTNEWSTask();
} else if (task_ == "WSC") {
key_map = CreateKeyMapForWSCTask();
} }
return key_map; return key_map;
} }
// Function to build CLUENode // Function to build CLUENode
Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) { Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
auto key_map = CreateKeyMapForBuild(); auto key_map = CreateKeyMap();
ColKeyMap ck_map; ColKeyMap ck_map;
for (auto &p : key_map) { for (auto &p : key_map) {
ck_map.insert({p.first, split(p.second, '/')}); ck_map.insert({p.first, split(p.second, '/')});
@ -246,11 +245,11 @@ Status CLUENode::to_json(nlohmann::json *out_json) {
return Status::OK(); return Status::OK();
} }
// Note: The following two functions are common among NonMappableSourceNode and should be promoted to its parent class. // Note: The following two functions are common among NonMappableSourceNode and should be promoted to its parent
// CLUE by itself is a non-mappable dataset that does not support sampling. // class. CLUE by itself is a non-mappable dataset that does not support sampling. However, if a cache operator is
// However, if a cache operator is injected at some other place higher in the tree, that cache can // injected at some other place higher in the tree, that cache can inherit this sampler from the leaf, providing
// inherit this sampler from the leaf, providing sampling support from the caching layer. // sampling support from the caching layer. That is why we setup the sampler for a leaf node that does not use
// That is why we setup the sampler for a leaf node that does not use sampling. // sampling.
Status CLUENode::SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) { Status CLUENode::SetupSamplerForCache(std::shared_ptr<SamplerObj> *sampler) {
bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles); bool shuffle_files = (shuffle_ == ShuffleMode::kGlobal || shuffle_ == ShuffleMode::kFiles);
*sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_); *sampler = SelectSampler(num_samples_, shuffle_files, num_shards_, shard_id_);

View File

@ -50,10 +50,6 @@ class CLUENode : public NonMappableSourceNode {
/// \return A shared pointer to the new copy /// \return A shared pointer to the new copy
std::shared_ptr<DatasetNode> Copy() override; std::shared_ptr<DatasetNode> Copy() override;
/// \brief Generate a key map to be used in Build() according to usage and task
/// \return The generated key map
std::map<std::string, std::string> CreateKeyMapForBuild();
/// \brief a base class override function to create the required runtime dataset op objects for this class /// \brief a base class override function to create the required runtime dataset op objects for this class
/// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create /// \param node_ops - A vector containing shared pointer to the Dataset Ops that this object will create
/// \return Status Status::OK() if build successfully /// \return Status Status::OK() if build successfully
@ -111,6 +107,30 @@ class CLUENode : public NonMappableSourceNode {
/// \return A string vector /// \return A string vector
std::vector<std::string> split(const std::string &s, char delim); std::vector<std::string> split(const std::string &s, char delim);
/// \brief Generate a key map for AFQMC or CMNLI task according to usage
/// \return The generated key map
std::map<std::string, std::string> CreateKeyMapForAFQMCOrCMNLITask();
/// \brief Generate a key map for CSL task according to usage
/// \return The generated key map
std::map<std::string, std::string> CreateKeyMapForCSLTask();
/// \brief Generate a key map for IFLYTEK task according to usage
/// \return The generated key map
std::map<std::string, std::string> CreateKeyMapForIFLYTEKTask();
/// \brief Generate a key map for TNEWS task according to usage
/// \return The generated key map
std::map<std::string, std::string> CreateKeyMapForTNEWSTask();
/// \brief Generate a key map for WSC task according to usage
/// \return The generated key map
std::map<std::string, std::string> CreateKeyMapForWSCTask();
/// \brief Generate a key map to be used in Build() according to usage and task
/// \return The generated key map
std::map<std::string, std::string> CreateKeyMap();
std::vector<std::string> dataset_files_; std::vector<std::string> dataset_files_;
std::string task_; std::string task_;
std::string usage_; std::string usage_;