codex && code review

This commit is contained in:
jojobugfree 2020-06-23 23:00:32 +08:00 committed by caifubi
parent cdaaf38206
commit 091d88849d
8 changed files with 73 additions and 61 deletions

View File

@ -28,6 +28,7 @@ PluginIntf *ProfilingEngineImpl::CreatePlugin() {
int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) { int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) {
if (plugin != nullptr) { if (plugin != nullptr) {
delete plugin; delete plugin;
plugin = nullptr;
} }
return 0; return 0;
} }

View File

@ -15,11 +15,8 @@
*/ */
#include "device/ascend/profiling/profiling_manager.h" #include "device/ascend/profiling/profiling_manager.h"
#include <stdlib.h> #include <stdlib.h>
#include <vector> #include <vector>
#include <nlohmann/json.hpp>
#include "securec/include/securec.h" #include "securec/include/securec.h"
#include "./prof_mgr_core.h" #include "./prof_mgr_core.h"
#include "device/ascend/profiling/plugin_impl.h" #include "device/ascend/profiling/plugin_impl.h"
@ -30,9 +27,6 @@
#include "utils/convert_utils.h" #include "utils/convert_utils.h"
#include "runtime/base.h" #include "runtime/base.h"
using std::vector;
using Json = nlohmann::json;
namespace mindspore { namespace mindspore {
namespace device { namespace device {
namespace ascend { namespace ascend {
@ -124,35 +118,43 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
auto context = MsContext::GetInstance(); auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context); MS_EXCEPTION_IF_NULL(context);
const string prof_options_str = context->profiling_options(); const string prof_options_str = context->profiling_options();
vector<string> opts = Split(prof_options_str, ':'); std::vector<string> opts = Split(prof_options_str, ':');
if (opts.empty()) { if (opts.empty()) {
MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!"; MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!";
return true; return true;
} }
// current one docker only use one device` // current one docker only use one device`
Json p_device; nlohmann::json p_device;
// JOBID // JOBID
auto job_id = GetJobId(); auto job_id = GetJobId();
p_device["jobID"] = std::to_string(job_id); p_device["jobID"] = std::to_string(job_id);
// device_id // device_id
p_device["deviceID"] = std::to_string(device_id); p_device["deviceID"] = std::to_string(device_id);
// features:'training_trace', 'task_trace' etc // features:'training_trace', 'task_trace' etc
Json features; nlohmann::json features;
for (vector<string>::size_type i = 0; i < opts.size(); i++) { for (std::vector<string>::size_type i = 0; i < opts.size(); i++) {
Json f; nlohmann::json f;
f["name"] = opts[i]; f["name"] = opts[i];
features[i] = f; features[i] = f;
} }
p_device["features"] = features; p_device["features"] = features;
// only one device, but sProfMgrStartUp API require for device list // only one device, but sProfMgrStartUp API require for device list
Json devices; nlohmann::json devices;
devices[0] = p_device; devices[0] = p_device;
Json startCfg; nlohmann::json startCfg;
startCfg["startCfg"] = devices; startCfg["startCfg"] = devices;
if (!ProfStartUp(NOT_NULL(&startCfg))) {
MS_LOG(ERROR) << "ProfMgrStartUp failed.";
return false;
}
return true;
}
bool ProfilingManager::ProfStartUp(NotNull<nlohmann::json *> startCfg) {
// convert json to string // convert json to string
std::stringstream ss; std::stringstream ss;
ss << startCfg; ss << *startCfg;
std::string cfg = ss.str(); std::string cfg = ss.str();
MS_LOG(INFO) << "profiling config " << cfg; MS_LOG(INFO) << "profiling config " << cfg;
auto ret = rtProfilerStart(); auto ret = rtProfilerStart();

View File

@ -20,18 +20,15 @@
#include <cstring> #include <cstring>
#include <string> #include <string>
#include <memory> #include <memory>
#include <nlohmann/json.hpp>
#include "utils/contract.h"
#include "utils/context/ms_context.h" #include "utils/context/ms_context.h"
using std::map; using std::map;
using std::string; using std::string;
namespace mindspore { namespace mindspore {
namespace device { namespace device {
namespace ascend { namespace ascend {
// PROFILING_CUSTOM_LOGID_START 3
const uint64_t kProfilingFpStartLogId = 1;
const uint64_t kProfilingBpEndLogId = 2;
const uint64_t kProfilingIterEndLogId = 255;
class ProfilingEngineImpl; class ProfilingEngineImpl;
class ProfilingManager { class ProfilingManager {
public: public:
@ -52,6 +49,7 @@ class ProfilingManager {
~ProfilingManager() { prof_handle_ = nullptr; } ~ProfilingManager() { prof_handle_ = nullptr; }
private: private:
bool ProfStartUp(NotNull<nlohmann::json *> json);
std::shared_ptr<ProfilingEngineImpl> engine_0_; std::shared_ptr<ProfilingEngineImpl> engine_0_;
uint32_t device_id_; uint32_t device_id_;
void *prof_handle_; void *prof_handle_;

View File

@ -33,6 +33,10 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
constexpr char kFpStartNode[] = "PROFILING_FP_START"; constexpr char kFpStartNode[] = "PROFILING_FP_START";
constexpr char kBpEndNode[] = "PROFILING_BP_END"; constexpr char kBpEndNode[] = "PROFILING_BP_END";
constexpr char kIterEndNode[] = "PROFILING_ITER_END"; constexpr char kIterEndNode[] = "PROFILING_ITER_END";
// PROFILING_CUSTOM_LOGID_START 3
constexpr uint64_t kProfilingFpStartLogId = 1;
constexpr uint64_t kProfilingBpEndLogId = 2;
constexpr uint64_t kProfilingIterEndLogId = 255;
std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_; std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_; std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_; std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_;

View File

@ -58,9 +58,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
if (task_info->hccl_type() == kBroadcastOpName) { if (task_info->hccl_type() == kBroadcastOpName) {
// call hcom broadcast interface to run op // call hcom broadcast interface to run op
const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0); const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), ret = hcom_broadcast(tag_broadcast.c_str(), task_info->input_data_addr(), static_cast<u64>(task_info->count()),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()), static_cast<hcclDataType_t>(task_info->data_type()), static_cast<u32>(task_info->root_id()),
static_cast<u32>(task_info->root_id()), hccl_group.c_str(), stream); hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) { if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret); MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret);
return false; return false;
@ -68,9 +68,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
} else if (task_info->hccl_type() == kAllGatherOpName) { } else if (task_info->hccl_type() == kAllGatherOpName) {
// call hcom allgather interface to run op // call hcom allgather interface to run op
const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0); const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), ret = hcom_all_gather(tag_all_gather.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()), static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclDataType_t>(task_info->data_type()), hccl_group.c_str(), stream); hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) { if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret; MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret;
return false; return false;
@ -78,9 +78,8 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
} else if (task_info->hccl_type() == kAllReduceOpName) { } else if (task_info->hccl_type() == kAllReduceOpName) {
// call hcom allreduce interface to run op // call hcom allreduce interface to run op
const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0); const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), ret = hcom_all_reduce(tag_all_reduce.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()), static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream); static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) { if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret; MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret;
@ -90,8 +89,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
// call hcom reducescatter interface to run op // call hcom reducescatter interface to run op
const string tag_reduce_scatter = const string tag_reduce_scatter =
kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0); kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()), ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
reinterpret_cast<void *>(task_info->output_data_addr()),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()), static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream); static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) { if (ret != HCCL_SUCCESS) {

View File

@ -40,9 +40,9 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
return true; return true;
} }
void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) { void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
MS_EXCEPTION_IF_NULL(anf_node_ptr); MS_EXCEPTION_IF_NULL(anf_node_ptr);
if (anf_node_ptr->inputs().size() != 2) { MS_EXCEPTION_IF_NULL(kernel_inputs);
// akg process // akg process
// set atomic clean addr // set atomic clean addr
if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) { if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) {
@ -66,13 +66,20 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP
for (auto index : clean_output_indexs) { for (auto index : clean_output_indexs) {
auto device_address = AnfAlgo::GetOutputAddr(post_node, index); auto device_address = AnfAlgo::GetOutputAddr(post_node, index);
kernel::AddressPtr input = std::make_shared<kernel::Address>(); kernel::AddressPtr input = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(input);
input->addr = device_address->ptr_; input->addr = device_address->ptr_;
MS_EXCEPTION_IF_NULL(input->addr);
input->size = device_address->size_; input->size = device_address->size_;
kernel_inputs->push_back(input); kernel_inputs->push_back(input);
} }
MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size(); MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size();
} }
}
void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
MS_EXCEPTION_IF_NULL(kernel_inputs);
if (anf_node_ptr->inputs().size() != 2) {
LaunchAddrCleanAkgKernel(anf_node_ptr, kernel_inputs);
return; return;
} }
MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]); MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]);

View File

@ -48,6 +48,7 @@ class TaskGenerator {
private: private:
static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs); static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
static void LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector<TaskInfoPtr> *task_info_list); static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector<TaskInfoPtr> *task_info_list);
static bool LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list, static bool LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list,
uint32_t graph_id); uint32_t graph_id);

View File

@ -79,6 +79,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph
std::vector<int> data_shape = tensor->shape(); std::vector<int> data_shape = tensor->shape();
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>()); size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32); DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32);
MS_EXCEPTION_IF_NULL(address);
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) { if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
address->ptr_ = tensor->data_c(false); address->ptr_ = tensor->data_c(false);
} else { } else {