forked from mindspore-Ecosystem/mindspore
Code Warning Clean
This commit is contained in:
parent
d24f4e65e9
commit
dc19a515cd
|
@ -167,7 +167,7 @@ void MPICollective::AssignLocalRankSize(const std::string &name, const std::vect
|
|||
CHECK_RET(
|
||||
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs.data(), sizeof(size_t), MPI_BYTE, mpi_group_comm),
|
||||
MPI_SUCCESS, "MPI_Allgather host hash failed.");
|
||||
int local_rank_size = std::count(all_host_hashs.begin(), all_host_hashs.end(), host_hash);
|
||||
int local_rank_size = static_cast<int>(std::count(all_host_hashs.begin(), all_host_hashs.end(), host_hash));
|
||||
std::get<local_rank_size_index>(group_info_[name]) = local_rank_size;
|
||||
std::vector<int> group_world_ranks(group_ranks.begin(), group_ranks.end());
|
||||
world_map_[name] = group_world_ranks;
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include "plugin/device/ascend/hal/device/ge_runtime/task/task.h"
|
||||
|
|
|
@ -109,7 +109,7 @@ bool CanConvertDefaultShapeToNZ(const ShapeVector &shape) {
|
|||
if (i > 2) {
|
||||
break;
|
||||
}
|
||||
if (SizeToLong(shape[shape.size() - i]) != 1 && SizeToLong(shape[shape.size() - i] % kCubeSize != 0)) {
|
||||
if (SizeToInt(shape[shape.size() - i]) != 1 && SizeToInt(shape[shape.size() - i] % kCubeSize != 0)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ namespace device {
|
|||
namespace ascend {
|
||||
class AscendDeviceResManager : public DeviceResManager {
|
||||
public:
|
||||
AscendDeviceResManager() : mem_manager_(nullptr) {}
|
||||
AscendDeviceResManager() : compute_stream_(nullptr), communication_stream_(nullptr), mem_manager_(nullptr) {}
|
||||
~AscendDeviceResManager() override = default;
|
||||
|
||||
void Initialize() override;
|
||||
|
@ -54,17 +54,17 @@ class AscendDeviceResManager : public DeviceResManager {
|
|||
|
||||
// Create concrete device address according different device type.
|
||||
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format, TypeId type_id,
|
||||
const ShapeVector &shape = ShapeVector()) const override;
|
||||
const ShapeVector &shape) const override;
|
||||
|
||||
// Synchronize stream, device such as GPU and Ascend need stream to launch kernel asynchronously,
|
||||
// using 'SyncStream' to block thread and wait for completing all tasks in stream.
|
||||
// Devices that do not need stream could ignore the implementation of this function.
|
||||
bool SyncStream(size_t stream_id = 0) const override;
|
||||
bool SyncStream(size_t stream_id) const override;
|
||||
|
||||
// Really create an ascend stream.
|
||||
bool CreateStream(void **stream) const;
|
||||
|
||||
protected:
|
||||
// Really create an ascend stream.
|
||||
bool CreateStream(void **stream) const override;
|
||||
|
||||
// Really destroy an ascend stream.
|
||||
bool DestroyStream(void *stream) const override;
|
||||
|
||||
|
|
|
@ -92,7 +92,8 @@ void UnfoldRecursiveExecOrder(KernelGraph *kernel_graph) {
|
|||
}
|
||||
auto label_id = common::AnfAlgo::GetNodeAttr<uint32_t>(kernel_cnodes[i], kAttrLabelIndex);
|
||||
std::vector<CNodePtr> back;
|
||||
auto front = HandleRecursiveCall(kernel_cnodes, label_id, &i, &back);
|
||||
auto index = i;
|
||||
auto front = HandleRecursiveCall(kernel_cnodes, label_id, &index, &back);
|
||||
mem_reuse_order.insert(mem_reuse_order.end(), front.begin(), front.end());
|
||||
mem_reuse_order.insert(mem_reuse_order.end(), back.begin(), back.end());
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ class AscendGraphExecutor : public GraphExecutor {
|
|||
// compile graph interface
|
||||
void UpdateExecOrder(const KernelGraphPtr &graph) const;
|
||||
void AllocateGraphMemory(const NotNull<KernelGraphPtr> &root_graph) const;
|
||||
void AssignInputMemory(const NotNull<KernelGraphPtr> &graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
|
||||
void AssignInputMemory(const NotNull<KernelGraphPtr> &graph, NotNull<std::set<KernelGraphPtr> *> const memo) const;
|
||||
void LoadModel(const NotNull<KernelGraphPtr> &root_graph) const;
|
||||
|
||||
// LaunchGraph interface
|
||||
|
|
|
@ -302,7 +302,7 @@ bool AscendKernelExecutor::PySyncRuning() const {
|
|||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
if ((ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) &&
|
||||
ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE) && !res_manager_->SyncStream()) {
|
||||
ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE) && !res_manager_->SyncStream(0)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -414,12 +414,12 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
|
|||
auto profiler_inst = profiler::ascend::PynativeProfiler::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(profiler_inst);
|
||||
std::thread::id t_id = std::this_thread::get_id();
|
||||
(void)profiler_inst->OpDataProducerBegin(res_manager_->runtime_instance_, stream, t_id,
|
||||
kernel->fullname_with_scope(), is_dynamic_shape);
|
||||
profiler_inst->OpDataProducerBegin(res_manager_->runtime_instance_, stream, t_id, kernel->fullname_with_scope(),
|
||||
is_dynamic_shape);
|
||||
#endif
|
||||
ret = kernel_mod->Launch(real_inputs, workspace, outputs, stream);
|
||||
#ifndef ENABLE_SECURITY
|
||||
(void)profiler_inst->OpDataProducerEnd(t_id, is_dynamic_shape);
|
||||
profiler_inst->OpDataProducerEnd(t_id, is_dynamic_shape);
|
||||
#endif
|
||||
if (!ret) {
|
||||
MS_LOG(ERROR) << "Launch kernel failed, kernel full name: " << kernel->fullname_with_scope();
|
||||
|
@ -433,7 +433,7 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
|
|||
if ((profiler_manage_instance->GetNetDynamicShapeStatus() ||
|
||||
ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) &&
|
||||
ascend_instance->GetEnableFlag()) {
|
||||
ascend_instance->GetNodeTaskIdStreamId(kernel, graph_id, device_id, kernel_type);
|
||||
ascend_instance->GetNodeTaskIdStreamId(kernel, graph_id, UintToInt(device_id), kernel_type);
|
||||
}
|
||||
|
||||
return PySyncRuning();
|
||||
|
|
|
@ -315,7 +315,7 @@ void GeGraphExecutor::AllocOutputHostMemory(const KernelGraphPtr &kernel_graph)
|
|||
}
|
||||
}
|
||||
|
||||
bool GeGraphExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &) {
|
||||
bool GeGraphExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> & /* compile_options */) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
KernelGraphPtr kg = std::dynamic_pointer_cast<session::KernelGraph>(graph);
|
||||
MS_EXCEPTION_IF_NULL(kg);
|
||||
|
@ -334,14 +334,15 @@ bool GeGraphExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<str
|
|||
return true;
|
||||
}
|
||||
|
||||
bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &,
|
||||
std::vector<tensor::Tensor> *, const std::map<string, string> &) {
|
||||
bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
|
||||
std::vector<tensor::Tensor> *outputs,
|
||||
const std::map<string, string> & /* compile_options */) {
|
||||
MS_EXCEPTION_IF_NULL(graph);
|
||||
MS_LOG(INFO) << "GE run graph " << graph->ToString() << " start.";
|
||||
// copy input from device to host
|
||||
const auto &inputs = graph->get_inputs();
|
||||
const auto &cur_inputs = graph->get_inputs();
|
||||
std::vector<tensor::TensorPtr> input_tensors;
|
||||
for (const auto &input : inputs) {
|
||||
for (const auto &input : cur_inputs) {
|
||||
MS_EXCEPTION_IF_NULL(input);
|
||||
auto output_addr = AnfAlgo::GetMutableOutputAddr(input, 0);
|
||||
auto shapes = trans::GetRuntimePaddingShape(input, 0);
|
||||
|
@ -384,30 +385,31 @@ bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tens
|
|||
<< ge_outputs.size();
|
||||
}
|
||||
// copy output from host to device
|
||||
auto outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
|
||||
if (outputs.size() != ge_outputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid output size, graph's size " << outputs.size() << " tensor size " << ge_outputs.size();
|
||||
auto graph_outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
|
||||
if (graph_outputs.size() != ge_outputs.size()) {
|
||||
MS_LOG(EXCEPTION) << "Invalid output size, graph's size " << graph_outputs.size() << " tensor size "
|
||||
<< ge_outputs.size();
|
||||
}
|
||||
|
||||
std::vector<ShapeVector> output_shapes;
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
const auto &[output_node, idx] = outputs[i];
|
||||
for (size_t i = 0; i < graph_outputs.size(); ++i) {
|
||||
const auto &[output_node, idx] = graph_outputs[i];
|
||||
const auto &tensor = ge_outputs[i];
|
||||
auto output_addr = AnfAlgo::GetMutableOutputAddr(output_node, idx);
|
||||
output_addr->set_ptr(device_context_->device_res_manager_->AllocateMemory(tensor->GetSize()));
|
||||
output_addr->SetSize(tensor->GetSize());
|
||||
output_addr->set_is_ptr_persisted(false);
|
||||
|
||||
if (output_addr->GetSize() < LongToSize(tensor->GetSize())) {
|
||||
if (output_addr->GetSize() < LongToSize(UlongToLong(tensor->GetSize()))) {
|
||||
MS_LOG(EXCEPTION) << "Output node " << output_node->DebugString() << "'s mem size " << output_addr->GetSize()
|
||||
<< " is less than actual output size " << tensor->GetSize();
|
||||
}
|
||||
// memcpy_s does not support data that more than 2GB
|
||||
(void)memcpy(output_addr->GetMutablePtr(), tensor->GetData(), tensor->GetSize());
|
||||
(void)memcpy(reinterpret_cast<uint8_t *>(output_addr->GetMutablePtr()), tensor->GetData(), tensor->GetSize());
|
||||
auto actual_shapes = tensor->GetTensorDesc().GetShape().GetDims();
|
||||
output_shapes.emplace_back(std::move(actual_shapes));
|
||||
}
|
||||
UpdateOutputNodeShape(outputs, me_types, output_shapes);
|
||||
UpdateOutputNodeShape(graph_outputs, me_types, output_shapes);
|
||||
MS_LOG(INFO) << "GE run graph end.";
|
||||
return true;
|
||||
}
|
||||
|
@ -476,7 +478,7 @@ bool GeDeviceContext::InitGe(const std::shared_ptr<MsContext> &inst_context) {
|
|||
return true;
|
||||
}
|
||||
|
||||
if (inst_context->get_param<uint32_t>(MS_CTX_GE_REF)) {
|
||||
if (static_cast<bool>(inst_context->get_param<uint32_t>(MS_CTX_GE_REF))) {
|
||||
inst_context->increase_param<uint32_t>(MS_CTX_GE_REF);
|
||||
return true;
|
||||
}
|
||||
|
@ -506,7 +508,7 @@ void GeDeviceContext::GetGeOptions(const std::shared_ptr<MsContext> &ms_context_
|
|||
if (!dump_env.empty()) {
|
||||
auto &dump_parser = DumpJsonParser::GetInstance();
|
||||
dump_parser.Parse();
|
||||
(*ge_options)["ge.exec.enableDump"] = std::to_string(dump_parser.async_dump_enabled());
|
||||
(*ge_options)["ge.exec.enableDump"] = std::to_string(static_cast<int>(dump_parser.async_dump_enabled()));
|
||||
(*ge_options)["ge.exec.dumpPath"] = dump_parser.path();
|
||||
// Parse() make sure that input_output is less than 3.
|
||||
(*ge_options)["ge.exec.dumpMode"] = kGeDumpMode[dump_parser.input_output()];
|
||||
|
@ -519,10 +521,8 @@ void GeDeviceContext::GetGeOptions(const std::shared_ptr<MsContext> &ms_context_
|
|||
<< ", dump step is " << dump_parser.iteration_string() << ".";
|
||||
}
|
||||
auto profiler_manager = profiler::ProfilerManager::GetInstance();
|
||||
if (profiler_manager == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Profiler manager is nullptr";
|
||||
}
|
||||
(*ge_options)["ge.exec.profilingMode"] = std::to_string(profiler_manager->GetProfilingEnableFlag());
|
||||
MS_EXCEPTION_IF_NULL(profiler_manager);
|
||||
(*ge_options)["ge.exec.profilingMode"] = std::to_string(static_cast<int>(profiler_manager->GetProfilingEnableFlag()));
|
||||
if (profiler_manager->GetProfilingEnableFlag()) {
|
||||
(*ge_options)["ge.exec.profilingOptions"] = profiler_manager->GetProfilingOptions();
|
||||
}
|
||||
|
@ -616,13 +616,13 @@ void GeDeviceContext::SetDisableReuseMemoryFlag(std::map<std::string, std::strin
|
|||
}
|
||||
}
|
||||
|
||||
void GeDeviceContext::SetHcclOptions(const std::shared_ptr<MsContext> &ms_context_ptr,
|
||||
void GeDeviceContext::SetHcclOptions(const std::shared_ptr<MsContext> &inst_context,
|
||||
std::map<std::string, std::string> *ge_options) {
|
||||
MS_EXCEPTION_IF_NULL(ms_context_ptr);
|
||||
MS_EXCEPTION_IF_NULL(inst_context);
|
||||
MS_EXCEPTION_IF_NULL(ge_options);
|
||||
auto env_table_file = common::GetEnv("RANK_TABLE_FILE");
|
||||
auto env_rank_id = common::GetEnv("RANK_ID");
|
||||
auto env_device_id = std::to_string(ms_context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));
|
||||
auto env_device_id = std::to_string(inst_context->get_param<uint32_t>(MS_CTX_DEVICE_ID));
|
||||
if (!(env_table_file.empty() || env_rank_id.empty())) {
|
||||
MS_LOG(INFO) << "Initialize Ge for distribute parameter";
|
||||
MS_LOG(INFO) << "Use hccl, make sure hccl lib is set in OPTION_EXEC_EXTERN_PLUGIN_PATH.";
|
||||
|
@ -652,14 +652,14 @@ void GeDeviceContext::SetHcclOptions(const std::shared_ptr<MsContext> &ms_contex
|
|||
}
|
||||
}
|
||||
|
||||
bool GeDeviceContext::FinalizeGe(const std::shared_ptr<MsContext> &ms_context_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(ms_context_ptr);
|
||||
if (ms_context_ptr->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
|
||||
bool GeDeviceContext::FinalizeGe(const std::shared_ptr<MsContext> &inst_context) {
|
||||
MS_EXCEPTION_IF_NULL(inst_context);
|
||||
if (inst_context->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
|
||||
return true;
|
||||
}
|
||||
ms_context_ptr->decrease_param<uint32_t>(MS_CTX_GE_REF);
|
||||
if (ms_context_ptr->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
|
||||
ms_context_ptr->set_param<uint32_t>(MS_CTX_GE_REF, 0);
|
||||
inst_context->decrease_param<uint32_t>(MS_CTX_GE_REF);
|
||||
if (inst_context->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
|
||||
inst_context->set_param<uint32_t>(MS_CTX_GE_REF, 0);
|
||||
try {
|
||||
transform::ClearGeSessionAndRunner();
|
||||
} catch (const std::exception &e) {
|
||||
|
@ -671,10 +671,10 @@ bool GeDeviceContext::FinalizeGe(const std::shared_ptr<MsContext> &ms_context_pt
|
|||
if (ge::GEFinalize() != ge::GRAPH_SUCCESS) {
|
||||
MS_LOG(WARNING) << "Finalize GE failed!";
|
||||
}
|
||||
ms_context_ptr->set_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT, false);
|
||||
inst_context->set_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT, false);
|
||||
} else {
|
||||
MS_LOG(INFO) << "Ge is used, no need to finalize, tsd reference = "
|
||||
<< ms_context_ptr->get_param<uint32_t>(MS_CTX_GE_REF) << ".";
|
||||
<< inst_context->get_param<uint32_t>(MS_CTX_GE_REF) << ".";
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -41,11 +41,10 @@ class GeDeviceResManager : public DeviceResManager {
|
|||
std::vector<void *> AllocateContinuousMemory(const std::vector<size_t> &size_list) const override;
|
||||
|
||||
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format, TypeId type_id,
|
||||
const ShapeVector &shape = ShapeVector()) const override;
|
||||
const ShapeVector &shape) const override;
|
||||
|
||||
static void CreateSessionAndGraphRunner(bool is_training);
|
||||
|
||||
protected:
|
||||
// Relevant function to allocate and free device memory of raw ptr.
|
||||
void *AllocateMemory(size_t size) const override;
|
||||
void FreeMemory(void *ptr) const override;
|
||||
|
|
|
@ -91,7 +91,7 @@ using DeviceContextPtr = std::shared_ptr<DeviceContext>;
|
|||
|
||||
class DeviceResManager {
|
||||
public:
|
||||
DeviceResManager() : collective_comm_lib_(nullptr) {}
|
||||
DeviceResManager() : collective_comm_lib_(nullptr), device_context_(nullptr) {}
|
||||
virtual ~DeviceResManager() = default;
|
||||
|
||||
// Initialize the device resource manager.
|
||||
|
|
|
@ -247,10 +247,10 @@ class DimReduce(Cell):
|
|||
_save_weight(self.gk_last, self.gk_last_back)
|
||||
_save_weight(self.bk, self.bk_back)
|
||||
|
||||
clone = self._res_loss(old_grad, grad_proj, weight, weight_clone, rho)
|
||||
clone = self._res_loss(old_grad, grad_proj, weight, weight_clone, rho, dn)
|
||||
return F.depend(loss, clone)
|
||||
|
||||
def _res_loss(self, old_grad, grad_proj, weight, weight_clone, rho):
|
||||
def _res_loss(self, old_grad, grad_proj, weight, weight_clone, rho, dn):
|
||||
"""update loss"""
|
||||
update_grad = self.hyper_map(F.partial(_update_grad_res_momentum, self.gamma, self.alpha),
|
||||
self.grad_res_momentum, old_grad, grad_proj)
|
||||
|
|
Loading…
Reference in New Issue