!6907 Load input tensors in debugger before suspending execution
Merge pull request !6907 from Harshvardhan Gupta/load-input-dbg
This commit is contained in:
commit
9c79b9d712
|
@ -171,7 +171,7 @@ GraphId AscendSession::CompileGraph(NotNull<FuncGraphPtr> func_graph) {
|
|||
device::KernelAdjust::GetInstance().Profiling(NOT_NULL(root_graph.get()));
|
||||
// build kernel
|
||||
BuildKernel(root_graph);
|
||||
if (debugger_) {
|
||||
if (debugger_ && debugger_->partial_memory()) {
|
||||
debugger_->PreExecute(root_graph);
|
||||
}
|
||||
SetSummaryNodes(root_graph.get());
|
||||
|
@ -248,7 +248,7 @@ void AscendSession::BuildGraph(GraphId graph_id) {
|
|||
BuildKernel(graph);
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if (debugger_) {
|
||||
if (debugger_ && debugger_->partial_memory()) {
|
||||
debugger_->PreExecute(graph);
|
||||
}
|
||||
if (ms_context->get_param<bool>(MS_CTX_PRECOMPILE_ONLY)) {
|
||||
|
@ -312,6 +312,9 @@ void AscendSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::
|
|||
}
|
||||
// load input data from user input
|
||||
LoadInputData(kernel_graph, inputs);
|
||||
if (debugger_) {
|
||||
debugger_->PreExecute(kernel_graph);
|
||||
}
|
||||
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
|
||||
// Initialize parameter server
|
||||
InitPSParamAndOptim(kernel_graph, inputs);
|
||||
|
|
|
@ -278,9 +278,9 @@ GraphId GPUSession::CompileGraph(const AnfNodePtrList &lst, const AnfNodePtrList
|
|||
|
||||
void GPUSession::RunGraph(const GraphId &graph_id, const std::vector<tensor::TensorPtr> &inputs, VectorRef *outputs) {
|
||||
auto &kernel_graph = graphs_[graph_id];
|
||||
PreIterationDbg(kernel_graph);
|
||||
// Load input data from user input
|
||||
LoadInputData(kernel_graph, inputs);
|
||||
PreIterationDbg(kernel_graph);
|
||||
#if (ENABLE_CPU && (ENABLE_D || ENABLE_GPU))
|
||||
// Initialize parameter server
|
||||
InitPSParamAndOptim(kernel_graph, inputs);
|
||||
|
|
|
@ -22,7 +22,6 @@
|
|||
#include <utility>
|
||||
#include <memory>
|
||||
#include <map>
|
||||
|
||||
#include "backend/session/session_context.h"
|
||||
#include "backend/session/kernel_graph.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "pipeline/jit/pipeline.h"
|
||||
#include "backend/session/anf_runtime_algorithm.h"
|
||||
#include "runtime/device/kernel_runtime_manager.h"
|
||||
#include "runtime/device/kernel_runtime.h"
|
||||
|
||||
using debugger::EventReply;
|
||||
using debugger::GraphProto;
|
||||
|
@ -47,6 +48,7 @@ namespace mindspore {
|
|||
|
||||
DebuggerPtr Debugger::debugger_ = nullptr;
|
||||
std::mutex Debugger::instance_lock_;
|
||||
static const size_t PRAMATER_OUTPUT_INDEX = 0;
|
||||
|
||||
Debugger::Debugger()
|
||||
: grpc_client_(nullptr),
|
||||
|
@ -62,7 +64,26 @@ Debugger::Debugger()
|
|||
is_dataset_graph_(false),
|
||||
partial_memory_(false),
|
||||
last_overflow_bin_(0),
|
||||
overflow_bin_path_("") {}
|
||||
overflow_bin_path_("") {
|
||||
if (CheckDebuggerEnabled()) {
|
||||
// configure partial memory reuse
|
||||
partial_memory_ = CheckDebuggerPartialMemoryEnabled();
|
||||
|
||||
// switch memory reuse on or off
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
context_ptr->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, partial_memory_);
|
||||
// print some message about memory reuse to user
|
||||
if (partial_memory_) {
|
||||
MS_LOG(WARNING)
|
||||
<< "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first "
|
||||
"step. 2. Tensor values are only available for nodes that are watched by any watchpoint.";
|
||||
} else {
|
||||
MS_LOG(INFO) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory "
|
||||
"usage for large models.";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Debugger::Init(const uint32_t device_id, const std::string device_target) {
|
||||
// access lock for public method
|
||||
|
@ -133,27 +154,6 @@ void Debugger::EnableDebugger() {
|
|||
MS_LOG(INFO) << "Environment variable MS_DEBUGGER_PORT doesn't exist. Using default debugger port: 50051";
|
||||
port = "50051";
|
||||
}
|
||||
|
||||
// configure partial memory reuse
|
||||
const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM");
|
||||
if (env_partial_mem_str != nullptr) {
|
||||
MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str;
|
||||
if (std::strcmp(env_partial_mem_str, "1") == 0) {
|
||||
partial_memory_ = true;
|
||||
}
|
||||
}
|
||||
// switch memory reuse on or off
|
||||
auto context_ptr = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(context_ptr);
|
||||
context_ptr->set_param<bool>(MS_CTX_ENABLE_MEM_REUSE, partial_memory_);
|
||||
// print some message about memory reuse to user
|
||||
if (partial_memory_) {
|
||||
MS_LOG(WARNING) << "Partial Memory Reuse is enabled. Note: 1. Please only set watchpoints before running the first "
|
||||
"step. 2. Tensor values are only available for nodes that are watched by any watchpoint.";
|
||||
} else {
|
||||
MS_LOG(INFO) << "Memory Reuse is disabled. Set environment variable MS_DEBUGGER_PARTIAL_MEM=1 to reduce memory "
|
||||
"usage for large models.";
|
||||
}
|
||||
#ifdef ENABLE_D
|
||||
// set operation overflow info
|
||||
overflow_bin_path_ = DumpJsonParser::GetInstance().GetOpOverflowBinPath(graph_ptr_->graph_id(), device_id_);
|
||||
|
@ -195,9 +195,7 @@ void Debugger::EnableDebugger() {
|
|||
bool Debugger::CheckDebuggerDumpEnabled() {
|
||||
// see if dump is enabled
|
||||
if (device_target_ == kGPUDevice) {
|
||||
auto runtime_instance = device::KernelRuntimeManager::Instance().GetSingleKernelRuntime(kGPUDevice, device_id_);
|
||||
MS_EXCEPTION_IF_NULL(runtime_instance);
|
||||
return runtime_instance->DumpDataEnabled();
|
||||
return device::KernelRuntime::DumpDataEnabled();
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -213,6 +211,17 @@ bool Debugger::CheckDebuggerEnabled() {
|
|||
return false;
|
||||
}
|
||||
|
||||
bool Debugger::CheckDebuggerPartialMemoryEnabled() {
|
||||
const char *env_partial_mem_str = std::getenv("MS_DEBUGGER_PARTIAL_MEM");
|
||||
if (env_partial_mem_str != nullptr) {
|
||||
MS_LOG(INFO) << "Getenv MS_DEBUGGER_PARTIAL_MEM: " << env_partial_mem_str;
|
||||
if (std::strcmp(env_partial_mem_str, "1") == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Debugger::DebuggerBackendEnabled() { return CheckDebuggerDumpEnabled() || CheckDebuggerEnabled(); }
|
||||
|
||||
void Debugger::Reset() {
|
||||
|
@ -324,6 +333,7 @@ void Debugger::CheckGraphPtr(const KernelGraphPtr &graph_ptr) {
|
|||
// only try to enable debugger if it is not a dataset graph
|
||||
EnableDebugger();
|
||||
if (debugger_enabled_) {
|
||||
LoadParameters();
|
||||
// get graph proto and send to mindinsight
|
||||
SendGraphAndSuspend(GetGraphProto());
|
||||
}
|
||||
|
@ -839,4 +849,34 @@ bool Debugger::CheckPort(const char *port) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void Debugger::LoadParameters() {
|
||||
if (!(debugger_enabled_ || CheckDebuggerDumpEnabled())) return;
|
||||
if (!(num_step_ == 0 || device_target_ == kAscendDevice ||
|
||||
(device_target_ == kGPUDevice && device::KernelRuntime::DumpDataEnabledIteration())))
|
||||
return;
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr_);
|
||||
const auto ¶meters = graph_ptr_->inputs();
|
||||
// for parameters, set its execution order to be 0;
|
||||
int exec_order = 0;
|
||||
for (auto &item : parameters) {
|
||||
if (!item->isa<Parameter>()) {
|
||||
continue;
|
||||
}
|
||||
std::string parameter_name = item->fullname_with_scope();
|
||||
auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX);
|
||||
auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX);
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
string tensor_name = parameter_name + ':' + "0";
|
||||
ShapeVector int_shapes;
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
bool ret = addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -103,6 +103,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
|
||||
void SendMetadata();
|
||||
|
||||
void LoadParameters();
|
||||
|
||||
private:
|
||||
// private constructor for singleton
|
||||
Debugger();
|
||||
|
@ -118,6 +120,8 @@ class Debugger : public std::enable_shared_from_this<Debugger> {
|
|||
// check if debugger enabled
|
||||
bool CheckDebuggerEnabled();
|
||||
|
||||
bool CheckDebuggerPartialMemoryEnabled();
|
||||
|
||||
// check and save graph pointer
|
||||
void CheckGraphPtr(const KernelGraphPtr &graph_ptr);
|
||||
|
||||
|
|
|
@ -663,39 +663,25 @@ bool AscendDeviceAddress::DumpMemToFile(bool trans_flag, const std::string &file
|
|||
}
|
||||
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
bool AscendDeviceAddress::LoadMemToHost(bool trans_flag, const std::string &tensor_name, int execution_order,
|
||||
bool AscendDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order,
|
||||
const std::string &host_fmt, const ShapeVector &host_shape, TypeId host_type,
|
||||
size_t slot, Debugger *debugger, bool keep_prev) const {
|
||||
size_t slot, bool keep_prev) const {
|
||||
bool ret = false;
|
||||
DebugServices *debug_services = debugger->debug_services();
|
||||
MS_EXCEPTION_IF_NULL(debug_services);
|
||||
TensorLoader *tensor_loader = debug_services->tensor_loader();
|
||||
TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader();
|
||||
MS_EXCEPTION_IF_NULL(tensor_loader);
|
||||
// TensorData is freed up in AscendSession class
|
||||
auto tensor_data = std::make_shared<mindspore::TensorData>();
|
||||
tensor_data->SetName(tensor_name);
|
||||
tensor_data->SetExecutionOrder(execution_order);
|
||||
tensor_data->SetSlot(slot);
|
||||
if (trans_flag) {
|
||||
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(host_type, host_shape);
|
||||
size_t host_size = out_tensor->data().nbytes();
|
||||
ret = SyncDeviceToHost(host_shape, host_size, host_type, out_tensor->data_c());
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "Copy device mem to host failed";
|
||||
return ret;
|
||||
}
|
||||
tensor_data->SetTensor(out_tensor);
|
||||
} else {
|
||||
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
|
||||
size_t host_size = out_tensor->data().nbytes();
|
||||
auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST);
|
||||
if (ret_rt_memcpy != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
|
||||
}
|
||||
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||
tensor_data->SetTensor(out_tensor);
|
||||
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
|
||||
size_t host_size = out_tensor->data().nbytes();
|
||||
auto ret_rt_memcpy = rtMemcpy(out_tensor->data_c(), host_size, ptr_, host_size, RT_MEMCPY_DEVICE_TO_HOST);
|
||||
if (ret_rt_memcpy != RT_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "SyncDeviceToHost: rtMemcpy mem size[" << size_ << "] fail, ret[" << ret_rt_memcpy << "]";
|
||||
}
|
||||
MS_LOG(INFO) << "E2E tensor name is " << tensor_name;
|
||||
tensor_data->SetTensor(out_tensor);
|
||||
ret = tensor_loader->LoadNewTensor(tensor_data, keep_prev);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -45,9 +45,8 @@ class AscendDeviceAddress : public DeviceAddress {
|
|||
bool DumpMemToFile(bool dump_mode, const std::string &filepath, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type) const override;
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
bool LoadMemToHost(bool dump_mode, const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger,
|
||||
bool keep_prev) const;
|
||||
bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override;
|
||||
#endif
|
||||
|
||||
private:
|
||||
|
|
|
@ -254,15 +254,10 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
|
|||
auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr);
|
||||
MS_EXCEPTION_IF_NULL(ascend_addr);
|
||||
ShapeVector int_shapes;
|
||||
if (trans_flag) {
|
||||
int_shapes = trans::GetRuntimePaddingShape(node, j);
|
||||
} else {
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(node, j);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
}
|
||||
auto ret =
|
||||
ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, j, debugger, false);
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(node, j);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
auto ret = ascend_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost: flag:" << trans_flag << ", tensor_name:" << tensor_name
|
||||
<< ", host_format:" << format << ".!";
|
||||
|
@ -272,40 +267,6 @@ void LoadOutput(mindspore::session::KernelGraph *graph, Debugger *debugger) {
|
|||
}
|
||||
}
|
||||
|
||||
void LoadParameters(mindspore::session::KernelGraph *graph, Debugger *debugger) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
// trans_flag: "true" means tensor values will be transfered to host format, otherwise not.
|
||||
bool trans_flag = false;
|
||||
const auto ¶meters = graph->inputs();
|
||||
// for parameters, set its execution order to be 0;
|
||||
int exec_order = 0;
|
||||
for (auto &item : parameters) {
|
||||
if (!item->isa<Parameter>()) {
|
||||
continue;
|
||||
}
|
||||
std::string parameter_name = item->fullname_with_scope();
|
||||
auto addr = AnfAlgo::GetOutputAddr(item, PRAMATER_OUTPUT_INDEX);
|
||||
auto type = AnfAlgo::GetOutputInferDataType(item, PRAMATER_OUTPUT_INDEX);
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
string tensor_name = parameter_name + ':' + "0";
|
||||
auto ascend_addr = dynamic_cast<const mindspore::device::ascend::AscendDeviceAddress *>(addr);
|
||||
MS_EXCEPTION_IF_NULL(ascend_addr);
|
||||
ShapeVector int_shapes;
|
||||
if (trans_flag) {
|
||||
int_shapes = trans::GetRuntimePaddingShape(item, PRAMATER_OUTPUT_INDEX);
|
||||
} else {
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(item, PRAMATER_OUTPUT_INDEX);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
}
|
||||
auto ret =
|
||||
ascend_addr->LoadMemToHost(trans_flag, tensor_name, exec_order, format, int_shapes, type, 0, debugger, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost Failed: flag:" << trans_flag << ", path:" << tensor_name
|
||||
<< ", host_format:" << format << ".!";
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace
|
||||
#endif
|
||||
|
||||
|
@ -319,7 +280,7 @@ bool AscendKernelRuntime::LoadData(mindspore::session::KernelGraph *graph, Debug
|
|||
// load output
|
||||
LoadOutput(graph, debugger);
|
||||
// load parameters
|
||||
LoadParameters(graph, debugger);
|
||||
if (debugger) debugger->LoadParameters();
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -70,6 +70,12 @@ class DeviceAddress : public mindspore::DeviceSync {
|
|||
const ShapeVector &host_shape, TypeId host_type) const {
|
||||
return true;
|
||||
}
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
virtual bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const {
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
protected:
|
||||
const void *ptr() const { return ptr_; }
|
||||
|
|
|
@ -80,14 +80,14 @@ GPUDeviceAddress::~GPUDeviceAddress() {
|
|||
}
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
bool GPUDeviceAddress::LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot,
|
||||
bool keep_prev) const {
|
||||
bool ret = false;
|
||||
if (size_ == 0) {
|
||||
return true;
|
||||
}
|
||||
DebugServices *debug_services = debugger->debug_services();
|
||||
TensorLoader *tensor_loader = debug_services->tensor_loader();
|
||||
|
||||
TensorLoader *tensor_loader = Debugger::GetInstance()->debug_services()->tensor_loader();
|
||||
|
||||
mindspore::tensor::TensorPtr out_tensor = std::make_shared<tensor::Tensor>(type_id_, host_shape);
|
||||
size_t host_size = out_tensor->data().nbytes();
|
||||
|
|
|
@ -44,8 +44,7 @@ class GPUDeviceAddress : public DeviceAddress {
|
|||
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
bool LoadMemToHost(const std::string &tensor_name, int execution_order, const std::string &host_fmt,
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, Debugger *debugger,
|
||||
bool keep_prev) const;
|
||||
const ShapeVector &host_shape, TypeId host_type, size_t slot, bool keep_prev) const override;
|
||||
#endif
|
||||
private:
|
||||
DeviceAddressStatus status_{DeviceAddressStatus::kInDevice};
|
||||
|
|
|
@ -111,7 +111,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
auto shape = AnfAlgo::GetOutputDeviceShape(input_kernel, PARAMETER_OUTPUT_INDEX);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, debugger, true);
|
||||
auto ret = gpu_addr->LoadMemToHost(input_tensor_name, exec_order, format, int_shapes, type, 0, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << input_tensor_name << ", host_format:" << format << ".!";
|
||||
|
@ -130,7 +130,7 @@ void LoadKernelData(Debugger *debugger, const CNodePtr &kernel,
|
|||
auto shape = AnfAlgo::GetOutputDeviceShape(kernel, j);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, debugger, false);
|
||||
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, j, false);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
|
@ -148,36 +148,6 @@ void UpdateStepNum(Debugger *debugger, bool dump_enabled) {
|
|||
}
|
||||
}
|
||||
|
||||
void LoadParameters(const session::KernelGraph *graph, Debugger *debugger, bool dump_enabled) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
if (!(debugger && dump_enabled)) {
|
||||
return;
|
||||
}
|
||||
const auto ¶meters = graph->inputs();
|
||||
// for parameters, set its execution order to be 0;
|
||||
int exec_order = 0;
|
||||
for (auto &item : parameters) {
|
||||
if (!item->isa<Parameter>()) {
|
||||
continue;
|
||||
}
|
||||
std::string parameter_name = item->fullname_with_scope();
|
||||
auto addr = AnfAlgo::GetOutputAddr(item, PARAMETER_OUTPUT_INDEX);
|
||||
auto type = AnfAlgo::GetOutputInferDataType(item, PARAMETER_OUTPUT_INDEX);
|
||||
auto format = kOpFormat_DEFAULT;
|
||||
string tensor_name = parameter_name + ':' + "0";
|
||||
auto gpu_addr = dynamic_cast<const mindspore::device::gpu::GPUDeviceAddress *>(addr);
|
||||
ShapeVector int_shapes;
|
||||
auto shape = AnfAlgo::GetOutputDeviceShape(item, PARAMETER_OUTPUT_INDEX);
|
||||
(void)std::transform(shape.begin(), shape.end(), std::back_inserter(int_shapes),
|
||||
[](size_t inner_item) { return SizeToInt(inner_item); });
|
||||
auto ret = gpu_addr->LoadMemToHost(tensor_name, exec_order, format, int_shapes, type, 0, debugger, true);
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "LoadMemToHost:"
|
||||
<< ", tensor_name:" << tensor_name << ", host_format:" << format << ".!";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ClearCurrentData(Debugger *debugger, bool dump_enabled) {
|
||||
if (debugger && (debugger->debugger_enabled() || dump_enabled)) {
|
||||
DebugServices *debug_services = debugger->debug_services();
|
||||
|
@ -601,7 +571,7 @@ bool GPUKernelRuntime::LaunchKernelDynamic(const session::KernelGraph *graph, De
|
|||
}
|
||||
if (!mock) {
|
||||
// collect weights and bias for dump mode
|
||||
LoadParameters(graph, debugger, dump_enabled);
|
||||
if (debugger) debugger->LoadParameters();
|
||||
CHECK_OP_RET_WITH_EXCEPT(SyncStream(), "SyncStream failed.");
|
||||
}
|
||||
ClearSwapInfo(mock);
|
||||
|
|
|
@ -53,8 +53,8 @@ class KernelRuntime {
|
|||
void RunOpAssignMemory(const ValuePtr &pre_output_value, const std::vector<tensor::TensorPtr> &input_tensors,
|
||||
session::KernelGraph *graph);
|
||||
void RunOpClearMemory(const session::KernelGraph *graph);
|
||||
bool DumpDataEnabled();
|
||||
bool DumpDataEnabledIteration();
|
||||
static bool DumpDataEnabled();
|
||||
static bool DumpDataEnabledIteration();
|
||||
virtual bool LoadData(session::KernelGraph *graph, Debugger *debugger);
|
||||
virtual bool Load(session::KernelGraph *graph, bool is_task_sink);
|
||||
virtual bool Run(session::KernelGraph *graph, bool is_task_sink, Debugger *debugger = nullptr) = 0;
|
||||
|
|
Loading…
Reference in New Issue