codex && code review

This commit is contained in:
jojobugfree 2020-06-23 23:00:32 +08:00 committed by caifubi
parent cdaaf38206
commit 091d88849d
8 changed files with 73 additions and 61 deletions

View File

@ -28,6 +28,7 @@ PluginIntf *ProfilingEngineImpl::CreatePlugin() {
int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) {
if (plugin != nullptr) {
delete plugin;
plugin = nullptr;
}
return 0;
}

View File

@ -15,11 +15,8 @@
*/
#include "device/ascend/profiling/profiling_manager.h"
#include <stdlib.h>
#include <vector>
#include <nlohmann/json.hpp>
#include "securec/include/securec.h"
#include "./prof_mgr_core.h"
#include "device/ascend/profiling/plugin_impl.h"
@ -30,9 +27,6 @@
#include "utils/convert_utils.h"
#include "runtime/base.h"
using std::vector;
using Json = nlohmann::json;
namespace mindspore {
namespace device {
namespace ascend {
@ -124,35 +118,43 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
const string prof_options_str = context->profiling_options();
vector<string> opts = Split(prof_options_str, ':');
std::vector<string> opts = Split(prof_options_str, ':');
if (opts.empty()) {
MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!";
return true;
}
// current one docker only use one device`
Json p_device;
nlohmann::json p_device;
// JOBID
auto job_id = GetJobId();
p_device["jobID"] = std::to_string(job_id);
// device_id
p_device["deviceID"] = std::to_string(device_id);
// features:'training_trace', 'task_trace' etc
Json features;
for (vector<string>::size_type i = 0; i < opts.size(); i++) {
Json f;
nlohmann::json features;
for (std::vector<string>::size_type i = 0; i < opts.size(); i++) {
nlohmann::json f;
f["name"] = opts[i];
features[i] = f;
}
p_device["features"] = features;
// only one device, but sProfMgrStartUp API require for device list
Json devices;
nlohmann::json devices;
devices[0] = p_device;
Json startCfg;
nlohmann::json startCfg;
startCfg["startCfg"] = devices;
if (!ProfStartUp(NOT_NULL(&startCfg))) {
MS_LOG(ERROR) << "ProfMgrStartUp failed.";
return false;
}
return true;
}
bool ProfilingManager::ProfStartUp(NotNull<nlohmann::json *> startCfg) {
// convert json to string
std::stringstream ss;
ss << startCfg;
ss << *startCfg;
std::string cfg = ss.str();
MS_LOG(INFO) << "profiling config " << cfg;
auto ret = rtProfilerStart();

View File

@ -20,18 +20,15 @@
#include <cstring>
#include <string>
#include <memory>
#include <nlohmann/json.hpp>
#include "utils/contract.h"
#include "utils/context/ms_context.h"
using std::map;
using std::string;
namespace mindspore {
namespace device {
namespace ascend {
// PROFILING_CUSTOM_LOGID_START 3
const uint64_t kProfilingFpStartLogId = 1;
const uint64_t kProfilingBpEndLogId = 2;
const uint64_t kProfilingIterEndLogId = 255;
class ProfilingEngineImpl;
class ProfilingManager {
public:
@ -52,6 +49,7 @@ class ProfilingManager {
~ProfilingManager() { prof_handle_ = nullptr; }
private:
bool ProfStartUp(NotNull<nlohmann::json *> json);
std::shared_ptr<ProfilingEngineImpl> engine_0_;
uint32_t device_id_;
void *prof_handle_;

View File

@ -33,6 +33,10 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
constexpr char kFpStartNode[] = "PROFILING_FP_START";
constexpr char kBpEndNode[] = "PROFILING_BP_END";
constexpr char kIterEndNode[] = "PROFILING_ITER_END";
// PROFILING_CUSTOM_LOGID_START 3
constexpr uint64_t kProfilingFpStartLogId = 1;
constexpr uint64_t kProfilingBpEndLogId = 2;
constexpr uint64_t kProfilingIterEndLogId = 255;
std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_;

View File

@ -58,9 +58,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
if (task_info->hccl_type() == kBroadcastOpName) {
// call hcom broadcast interface to run op
const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<u32>(task_info->root_id()), hccl_group.c_str(), stream);
ret = hcom_broadcast(tag_broadcast.c_str(), task_info->input_data_addr(), static_cast<u64>(task_info->count()),
static_cast<hcclDataType_t>(task_info->data_type()), static_cast<u32>(task_info->root_id()),
hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret);
return false;
@ -68,9 +68,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
} else if (task_info->hccl_type() == kAllGatherOpName) {
// call hcom allgather interface to run op
const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
static_cast<hcclDataType_t>(task_info->data_type()), hccl_group.c_str(), stream);
ret = hcom_all_gather(tag_all_gather.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret;
return false;
@ -78,9 +78,8 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
} else if (task_info->hccl_type() == kAllReduceOpName) {
// call hcom allreduce interface to run op
const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
static_cast<hcclDataType_t>(task_info->data_type()),
ret = hcom_all_reduce(tag_all_reduce.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {
MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret;
@ -90,8 +89,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
// call hcom reducescatter interface to run op
const string tag_reduce_scatter =
kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0);
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
reinterpret_cast<void *>(task_info->output_data_addr()),
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
if (ret != HCCL_SUCCESS) {

View File

@ -40,39 +40,46 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
return true;
}
void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
MS_EXCEPTION_IF_NULL(kernel_inputs);
// akg process
// set atomic clean addr
if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) {
auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(anf_node_ptr, kAttrAtomicOutputIndexs);
auto graph = anf_node_ptr->func_graph();
MS_EXCEPTION_IF_NULL(graph);
auto manager = graph->manager();
MS_EXCEPTION_IF_NULL(manager);
auto node_users = manager->node_users();
if (node_users[anf_node_ptr].empty()) {
MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty.";
}
auto depend_node = node_users[anf_node_ptr].pop().first;
if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) {
MS_LOG(EXCEPTION) << "Checking Depend node failed";
}
if (node_users[depend_node].empty()) {
MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty.";
}
auto post_node = node_users[depend_node].pop().first;
for (auto index : clean_output_indexs) {
auto device_address = AnfAlgo::GetOutputAddr(post_node, index);
kernel::AddressPtr input = std::make_shared<kernel::Address>();
MS_EXCEPTION_IF_NULL(input);
input->addr = device_address->ptr_;
input->size = device_address->size_;
kernel_inputs->push_back(input);
}
MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size();
}
}
void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
MS_EXCEPTION_IF_NULL(anf_node_ptr);
MS_EXCEPTION_IF_NULL(kernel_inputs);
if (anf_node_ptr->inputs().size() != 2) {
// akg process
// set atomic clean addr
if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) {
auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(anf_node_ptr, kAttrAtomicOutputIndexs);
auto graph = anf_node_ptr->func_graph();
MS_EXCEPTION_IF_NULL(graph);
auto manager = graph->manager();
MS_EXCEPTION_IF_NULL(manager);
auto node_users = manager->node_users();
if (node_users[anf_node_ptr].empty()) {
MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty.";
}
auto depend_node = node_users[anf_node_ptr].pop().first;
if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) {
MS_LOG(EXCEPTION) << "Checking Depend node failed";
}
if (node_users[depend_node].empty()) {
MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty.";
}
auto post_node = node_users[depend_node].pop().first;
for (auto index : clean_output_indexs) {
auto device_address = AnfAlgo::GetOutputAddr(post_node, index);
kernel::AddressPtr input = std::make_shared<kernel::Address>();
input->addr = device_address->ptr_;
MS_EXCEPTION_IF_NULL(input->addr);
input->size = device_address->size_;
kernel_inputs->push_back(input);
}
MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size();
}
LaunchAddrCleanAkgKernel(anf_node_ptr, kernel_inputs);
return;
}
MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]);

View File

@ -48,6 +48,7 @@ class TaskGenerator {
private:
static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
static void LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector<TaskInfoPtr> *task_info_list);
static bool LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list,
uint32_t graph_id);

View File

@ -79,6 +79,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph
std::vector<int> data_shape = tensor->shape();
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32);
MS_EXCEPTION_IF_NULL(address);
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
address->ptr_ = tensor->data_c(false);
} else {