forked from mindspore-Ecosystem/mindspore
codex && code review
This commit is contained in:
parent
cdaaf38206
commit
091d88849d
|
@ -28,6 +28,7 @@ PluginIntf *ProfilingEngineImpl::CreatePlugin() {
|
|||
int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) {
|
||||
if (plugin != nullptr) {
|
||||
delete plugin;
|
||||
plugin = nullptr;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -15,11 +15,8 @@
|
|||
*/
|
||||
|
||||
#include "device/ascend/profiling/profiling_manager.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <vector>
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "securec/include/securec.h"
|
||||
#include "./prof_mgr_core.h"
|
||||
#include "device/ascend/profiling/plugin_impl.h"
|
||||
|
@ -30,9 +27,6 @@
|
|||
#include "utils/convert_utils.h"
|
||||
#include "runtime/base.h"
|
||||
|
||||
using std::vector;
|
||||
using Json = nlohmann::json;
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
|
@ -124,35 +118,43 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
|
|||
auto context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context);
|
||||
const string prof_options_str = context->profiling_options();
|
||||
vector<string> opts = Split(prof_options_str, ':');
|
||||
std::vector<string> opts = Split(prof_options_str, ':');
|
||||
if (opts.empty()) {
|
||||
MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!";
|
||||
return true;
|
||||
}
|
||||
// current one docker only use one device`
|
||||
Json p_device;
|
||||
nlohmann::json p_device;
|
||||
// JOBID
|
||||
auto job_id = GetJobId();
|
||||
p_device["jobID"] = std::to_string(job_id);
|
||||
// device_id
|
||||
p_device["deviceID"] = std::to_string(device_id);
|
||||
// features:'training_trace', 'task_trace' etc
|
||||
Json features;
|
||||
for (vector<string>::size_type i = 0; i < opts.size(); i++) {
|
||||
Json f;
|
||||
nlohmann::json features;
|
||||
for (std::vector<string>::size_type i = 0; i < opts.size(); i++) {
|
||||
nlohmann::json f;
|
||||
f["name"] = opts[i];
|
||||
features[i] = f;
|
||||
}
|
||||
p_device["features"] = features;
|
||||
// only one device, but sProfMgrStartUp API require for device list
|
||||
Json devices;
|
||||
nlohmann::json devices;
|
||||
devices[0] = p_device;
|
||||
Json startCfg;
|
||||
nlohmann::json startCfg;
|
||||
startCfg["startCfg"] = devices;
|
||||
|
||||
if (!ProfStartUp(NOT_NULL(&startCfg))) {
|
||||
MS_LOG(ERROR) << "ProfMgrStartUp failed.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfilingManager::ProfStartUp(NotNull<nlohmann::json *> startCfg) {
|
||||
// convert json to string
|
||||
std::stringstream ss;
|
||||
ss << startCfg;
|
||||
ss << *startCfg;
|
||||
std::string cfg = ss.str();
|
||||
MS_LOG(INFO) << "profiling config " << cfg;
|
||||
auto ret = rtProfilerStart();
|
||||
|
|
|
@ -20,18 +20,15 @@
|
|||
#include <cstring>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "utils/contract.h"
|
||||
#include "utils/context/ms_context.h"
|
||||
|
||||
using std::map;
|
||||
using std::string;
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
// PROFILING_CUSTOM_LOGID_START 3
|
||||
const uint64_t kProfilingFpStartLogId = 1;
|
||||
const uint64_t kProfilingBpEndLogId = 2;
|
||||
const uint64_t kProfilingIterEndLogId = 255;
|
||||
|
||||
class ProfilingEngineImpl;
|
||||
class ProfilingManager {
|
||||
public:
|
||||
|
@ -52,6 +49,7 @@ class ProfilingManager {
|
|||
~ProfilingManager() { prof_handle_ = nullptr; }
|
||||
|
||||
private:
|
||||
bool ProfStartUp(NotNull<nlohmann::json *> json);
|
||||
std::shared_ptr<ProfilingEngineImpl> engine_0_;
|
||||
uint32_t device_id_;
|
||||
void *prof_handle_;
|
||||
|
|
|
@ -33,6 +33,10 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
|
|||
constexpr char kFpStartNode[] = "PROFILING_FP_START";
|
||||
constexpr char kBpEndNode[] = "PROFILING_BP_END";
|
||||
constexpr char kIterEndNode[] = "PROFILING_ITER_END";
|
||||
// PROFILING_CUSTOM_LOGID_START 3
|
||||
constexpr uint64_t kProfilingFpStartLogId = 1;
|
||||
constexpr uint64_t kProfilingBpEndLogId = 2;
|
||||
constexpr uint64_t kProfilingIterEndLogId = 255;
|
||||
std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
|
||||
std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
|
||||
std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_;
|
||||
|
|
|
@ -58,9 +58,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
|||
if (task_info->hccl_type() == kBroadcastOpName) {
|
||||
// call hcom broadcast interface to run op
|
||||
const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||
ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
||||
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
||||
static_cast<u32>(task_info->root_id()), hccl_group.c_str(), stream);
|
||||
ret = hcom_broadcast(tag_broadcast.c_str(), task_info->input_data_addr(), static_cast<u64>(task_info->count()),
|
||||
static_cast<hcclDataType_t>(task_info->data_type()), static_cast<u32>(task_info->root_id()),
|
||||
hccl_group.c_str(), stream);
|
||||
if (ret != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret);
|
||||
return false;
|
||||
|
@ -68,9 +68,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
|||
} else if (task_info->hccl_type() == kAllGatherOpName) {
|
||||
// call hcom allgather interface to run op
|
||||
const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||
ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
||||
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
|
||||
static_cast<hcclDataType_t>(task_info->data_type()), hccl_group.c_str(), stream);
|
||||
ret = hcom_all_gather(tag_all_gather.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
|
||||
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
||||
hccl_group.c_str(), stream);
|
||||
if (ret != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret;
|
||||
return false;
|
||||
|
@ -78,9 +78,8 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
|||
} else if (task_info->hccl_type() == kAllReduceOpName) {
|
||||
// call hcom allreduce interface to run op
|
||||
const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||
ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
||||
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
|
||||
static_cast<hcclDataType_t>(task_info->data_type()),
|
||||
ret = hcom_all_reduce(tag_all_reduce.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
|
||||
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
||||
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
|
||||
if (ret != HCCL_SUCCESS) {
|
||||
MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret;
|
||||
|
@ -90,8 +89,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
|||
// call hcom reducescatter interface to run op
|
||||
const string tag_reduce_scatter =
|
||||
kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
||||
reinterpret_cast<void *>(task_info->output_data_addr()),
|
||||
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
|
||||
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
||||
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
|
||||
if (ret != HCCL_SUCCESS) {
|
||||
|
|
|
@ -40,9 +40,9 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
|
|||
return true;
|
||||
}
|
||||
|
||||
void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
|
||||
void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||
if (anf_node_ptr->inputs().size() != 2) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_inputs);
|
||||
// akg process
|
||||
// set atomic clean addr
|
||||
if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) {
|
||||
|
@ -66,13 +66,20 @@ void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressP
|
|||
for (auto index : clean_output_indexs) {
|
||||
auto device_address = AnfAlgo::GetOutputAddr(post_node, index);
|
||||
kernel::AddressPtr input = std::make_shared<kernel::Address>();
|
||||
MS_EXCEPTION_IF_NULL(input);
|
||||
input->addr = device_address->ptr_;
|
||||
MS_EXCEPTION_IF_NULL(input->addr);
|
||||
input->size = device_address->size_;
|
||||
kernel_inputs->push_back(input);
|
||||
}
|
||||
MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size();
|
||||
}
|
||||
}
|
||||
|
||||
void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||
MS_EXCEPTION_IF_NULL(kernel_inputs);
|
||||
if (anf_node_ptr->inputs().size() != 2) {
|
||||
LaunchAddrCleanAkgKernel(anf_node_ptr, kernel_inputs);
|
||||
return;
|
||||
}
|
||||
MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]);
|
||||
|
|
|
@ -48,6 +48,7 @@ class TaskGenerator {
|
|||
|
||||
private:
|
||||
static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
|
||||
static void LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
|
||||
static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector<TaskInfoPtr> *task_info_list);
|
||||
static bool LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list,
|
||||
uint32_t graph_id);
|
||||
|
|
|
@ -79,6 +79,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph
|
|||
std::vector<int> data_shape = tensor->shape();
|
||||
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
|
||||
DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32);
|
||||
MS_EXCEPTION_IF_NULL(address);
|
||||
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
|
||||
address->ptr_ = tensor->data_c(false);
|
||||
} else {
|
||||
|
|
Loading…
Reference in New Issue