Code Warning Clean

This commit is contained in:
suxin 2022-07-24 17:40:07 +08:00 committed by suxin
parent d24f4e65e9
commit dc19a515cd
11 changed files with 50 additions and 51 deletions

View File

@ -167,7 +167,7 @@ void MPICollective::AssignLocalRankSize(const std::string &name, const std::vect
CHECK_RET(
MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, all_host_hashs.data(), sizeof(size_t), MPI_BYTE, mpi_group_comm),
MPI_SUCCESS, "MPI_Allgather host hash failed.");
int local_rank_size = std::count(all_host_hashs.begin(), all_host_hashs.end(), host_hash);
int local_rank_size = static_cast<int>(std::count(all_host_hashs.begin(), all_host_hashs.end(), host_hash));
std::get<local_rank_size_index>(group_info_[name]) = local_rank_size;
std::vector<int> group_world_ranks(group_ranks.begin(), group_ranks.end());
world_map_[name] = group_world_ranks;

View File

@ -18,7 +18,6 @@
#define MINDSPORE_CCSRC_RUNTIME_DEVICE_ASCEND_GE_RUNTIME_TASK_LABEL_GOTO_TASK_H_
#include <memory>
#include <vector>
#include <map>
#include <mutex>
#include "plugin/device/ascend/hal/device/ge_runtime/task/task.h"

View File

@ -109,7 +109,7 @@ bool CanConvertDefaultShapeToNZ(const ShapeVector &shape) {
if (i > 2) {
break;
}
if (SizeToLong(shape[shape.size() - i]) != 1 && SizeToLong(shape[shape.size() - i] % kCubeSize != 0)) {
if (SizeToInt(shape[shape.size() - i]) != 1 && SizeToInt(shape[shape.size() - i] % kCubeSize != 0)) {
return false;
}
}

View File

@ -32,7 +32,7 @@ namespace device {
namespace ascend {
class AscendDeviceResManager : public DeviceResManager {
public:
AscendDeviceResManager() : mem_manager_(nullptr) {}
AscendDeviceResManager() : compute_stream_(nullptr), communication_stream_(nullptr), mem_manager_(nullptr) {}
~AscendDeviceResManager() override = default;
void Initialize() override;
@ -54,17 +54,17 @@ class AscendDeviceResManager : public DeviceResManager {
// Create concrete device address according different device type.
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format, TypeId type_id,
const ShapeVector &shape = ShapeVector()) const override;
const ShapeVector &shape) const override;
// Synchronize stream, device such as GPU and Ascend need stream to launch kernel asynchronously,
// using 'SyncStream' to block thread and wait for completing all tasks in stream.
// Devices that do not need stream could ignore the implementation of this function.
bool SyncStream(size_t stream_id = 0) const override;
bool SyncStream(size_t stream_id) const override;
// Really create an ascend stream.
bool CreateStream(void **stream) const;
protected:
// Really create an ascend stream.
bool CreateStream(void **stream) const override;
// Really destroy an ascend stream.
bool DestroyStream(void *stream) const override;

View File

@ -92,7 +92,8 @@ void UnfoldRecursiveExecOrder(KernelGraph *kernel_graph) {
}
auto label_id = common::AnfAlgo::GetNodeAttr<uint32_t>(kernel_cnodes[i], kAttrLabelIndex);
std::vector<CNodePtr> back;
auto front = HandleRecursiveCall(kernel_cnodes, label_id, &i, &back);
auto index = i;
auto front = HandleRecursiveCall(kernel_cnodes, label_id, &index, &back);
mem_reuse_order.insert(mem_reuse_order.end(), front.begin(), front.end());
mem_reuse_order.insert(mem_reuse_order.end(), back.begin(), back.end());
}

View File

@ -49,7 +49,7 @@ class AscendGraphExecutor : public GraphExecutor {
// compile graph interface
void UpdateExecOrder(const KernelGraphPtr &graph) const;
void AllocateGraphMemory(const NotNull<KernelGraphPtr> &root_graph) const;
void AssignInputMemory(const NotNull<KernelGraphPtr> &graph, NotNull<std::set<KernelGraphPtr> *> memo) const;
void AssignInputMemory(const NotNull<KernelGraphPtr> &graph, NotNull<std::set<KernelGraphPtr> *> const memo) const;
void LoadModel(const NotNull<KernelGraphPtr> &root_graph) const;
// LaunchGraph interface

View File

@ -302,7 +302,7 @@ bool AscendKernelExecutor::PySyncRuning() const {
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
if ((ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kPynativeMode) &&
ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE) && !res_manager_->SyncStream()) {
ms_context->get_param<bool>(MS_CTX_ENABLE_PYNATIVE_SYNCHRONIZE) && !res_manager_->SyncStream(0)) {
return false;
}
return true;
@ -414,12 +414,12 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
auto profiler_inst = profiler::ascend::PynativeProfiler::GetInstance();
MS_EXCEPTION_IF_NULL(profiler_inst);
std::thread::id t_id = std::this_thread::get_id();
(void)profiler_inst->OpDataProducerBegin(res_manager_->runtime_instance_, stream, t_id,
kernel->fullname_with_scope(), is_dynamic_shape);
profiler_inst->OpDataProducerBegin(res_manager_->runtime_instance_, stream, t_id, kernel->fullname_with_scope(),
is_dynamic_shape);
#endif
ret = kernel_mod->Launch(real_inputs, workspace, outputs, stream);
#ifndef ENABLE_SECURITY
(void)profiler_inst->OpDataProducerEnd(t_id, is_dynamic_shape);
profiler_inst->OpDataProducerEnd(t_id, is_dynamic_shape);
#endif
if (!ret) {
MS_LOG(ERROR) << "Launch kernel failed, kernel full name: " << kernel->fullname_with_scope();
@ -433,7 +433,7 @@ bool AscendKernelExecutor::LaunchKernel(const CNodePtr &kernel, const vector<Add
if ((profiler_manage_instance->GetNetDynamicShapeStatus() ||
ms_context->get_param<int>(MS_CTX_EXECUTION_MODE) == kGraphMode) &&
ascend_instance->GetEnableFlag()) {
ascend_instance->GetNodeTaskIdStreamId(kernel, graph_id, device_id, kernel_type);
ascend_instance->GetNodeTaskIdStreamId(kernel, graph_id, UintToInt(device_id), kernel_type);
}
return PySyncRuning();

View File

@ -315,7 +315,7 @@ void GeGraphExecutor::AllocOutputHostMemory(const KernelGraphPtr &kernel_graph)
}
}
bool GeGraphExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &) {
bool GeGraphExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> & /* compile_options */) {
MS_EXCEPTION_IF_NULL(graph);
KernelGraphPtr kg = std::dynamic_pointer_cast<session::KernelGraph>(graph);
MS_EXCEPTION_IF_NULL(kg);
@ -334,14 +334,15 @@ bool GeGraphExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<str
return true;
}
bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &,
std::vector<tensor::Tensor> *, const std::map<string, string> &) {
bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
std::vector<tensor::Tensor> *outputs,
const std::map<string, string> & /* compile_options */) {
MS_EXCEPTION_IF_NULL(graph);
MS_LOG(INFO) << "GE run graph " << graph->ToString() << " start.";
// copy input from device to host
const auto &inputs = graph->get_inputs();
const auto &cur_inputs = graph->get_inputs();
std::vector<tensor::TensorPtr> input_tensors;
for (const auto &input : inputs) {
for (const auto &input : cur_inputs) {
MS_EXCEPTION_IF_NULL(input);
auto output_addr = AnfAlgo::GetMutableOutputAddr(input, 0);
auto shapes = trans::GetRuntimePaddingShape(input, 0);
@ -384,30 +385,31 @@ bool GeGraphExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tens
<< ge_outputs.size();
}
// copy output from host to device
auto outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
if (outputs.size() != ge_outputs.size()) {
MS_LOG(EXCEPTION) << "Invalid output size, graph's size " << outputs.size() << " tensor size " << ge_outputs.size();
auto graph_outputs = common::AnfAlgo::GetAllOutputWithIndex(graph->output());
if (graph_outputs.size() != ge_outputs.size()) {
MS_LOG(EXCEPTION) << "Invalid output size, graph's size " << graph_outputs.size() << " tensor size "
<< ge_outputs.size();
}
std::vector<ShapeVector> output_shapes;
for (size_t i = 0; i < outputs.size(); ++i) {
const auto &[output_node, idx] = outputs[i];
for (size_t i = 0; i < graph_outputs.size(); ++i) {
const auto &[output_node, idx] = graph_outputs[i];
const auto &tensor = ge_outputs[i];
auto output_addr = AnfAlgo::GetMutableOutputAddr(output_node, idx);
output_addr->set_ptr(device_context_->device_res_manager_->AllocateMemory(tensor->GetSize()));
output_addr->SetSize(tensor->GetSize());
output_addr->set_is_ptr_persisted(false);
if (output_addr->GetSize() < LongToSize(tensor->GetSize())) {
if (output_addr->GetSize() < LongToSize(UlongToLong(tensor->GetSize()))) {
MS_LOG(EXCEPTION) << "Output node " << output_node->DebugString() << "'s mem size " << output_addr->GetSize()
<< " is less than actual output size " << tensor->GetSize();
}
// memcpy_s does not support data that more than 2GB
(void)memcpy(output_addr->GetMutablePtr(), tensor->GetData(), tensor->GetSize());
(void)memcpy(reinterpret_cast<uint8_t *>(output_addr->GetMutablePtr()), tensor->GetData(), tensor->GetSize());
auto actual_shapes = tensor->GetTensorDesc().GetShape().GetDims();
output_shapes.emplace_back(std::move(actual_shapes));
}
UpdateOutputNodeShape(outputs, me_types, output_shapes);
UpdateOutputNodeShape(graph_outputs, me_types, output_shapes);
MS_LOG(INFO) << "GE run graph end.";
return true;
}
@ -476,7 +478,7 @@ bool GeDeviceContext::InitGe(const std::shared_ptr<MsContext> &inst_context) {
return true;
}
if (inst_context->get_param<uint32_t>(MS_CTX_GE_REF)) {
if (static_cast<bool>(inst_context->get_param<uint32_t>(MS_CTX_GE_REF))) {
inst_context->increase_param<uint32_t>(MS_CTX_GE_REF);
return true;
}
@ -506,7 +508,7 @@ void GeDeviceContext::GetGeOptions(const std::shared_ptr<MsContext> &ms_context_
if (!dump_env.empty()) {
auto &dump_parser = DumpJsonParser::GetInstance();
dump_parser.Parse();
(*ge_options)["ge.exec.enableDump"] = std::to_string(dump_parser.async_dump_enabled());
(*ge_options)["ge.exec.enableDump"] = std::to_string(static_cast<int>(dump_parser.async_dump_enabled()));
(*ge_options)["ge.exec.dumpPath"] = dump_parser.path();
// Parse() make sure that input_output is less than 3.
(*ge_options)["ge.exec.dumpMode"] = kGeDumpMode[dump_parser.input_output()];
@ -519,10 +521,8 @@ void GeDeviceContext::GetGeOptions(const std::shared_ptr<MsContext> &ms_context_
<< ", dump step is " << dump_parser.iteration_string() << ".";
}
auto profiler_manager = profiler::ProfilerManager::GetInstance();
if (profiler_manager == nullptr) {
MS_LOG(EXCEPTION) << "Profiler manager is nullptr";
}
(*ge_options)["ge.exec.profilingMode"] = std::to_string(profiler_manager->GetProfilingEnableFlag());
MS_EXCEPTION_IF_NULL(profiler_manager);
(*ge_options)["ge.exec.profilingMode"] = std::to_string(static_cast<int>(profiler_manager->GetProfilingEnableFlag()));
if (profiler_manager->GetProfilingEnableFlag()) {
(*ge_options)["ge.exec.profilingOptions"] = profiler_manager->GetProfilingOptions();
}
@ -616,13 +616,13 @@ void GeDeviceContext::SetDisableReuseMemoryFlag(std::map<std::string, std::strin
}
}
void GeDeviceContext::SetHcclOptions(const std::shared_ptr<MsContext> &ms_context_ptr,
void GeDeviceContext::SetHcclOptions(const std::shared_ptr<MsContext> &inst_context,
std::map<std::string, std::string> *ge_options) {
MS_EXCEPTION_IF_NULL(ms_context_ptr);
MS_EXCEPTION_IF_NULL(inst_context);
MS_EXCEPTION_IF_NULL(ge_options);
auto env_table_file = common::GetEnv("RANK_TABLE_FILE");
auto env_rank_id = common::GetEnv("RANK_ID");
auto env_device_id = std::to_string(ms_context_ptr->get_param<uint32_t>(MS_CTX_DEVICE_ID));
auto env_device_id = std::to_string(inst_context->get_param<uint32_t>(MS_CTX_DEVICE_ID));
if (!(env_table_file.empty() || env_rank_id.empty())) {
MS_LOG(INFO) << "Initialize Ge for distribute parameter";
MS_LOG(INFO) << "Use hccl, make sure hccl lib is set in OPTION_EXEC_EXTERN_PLUGIN_PATH.";
@ -652,14 +652,14 @@ void GeDeviceContext::SetHcclOptions(const std::shared_ptr<MsContext> &ms_contex
}
}
bool GeDeviceContext::FinalizeGe(const std::shared_ptr<MsContext> &ms_context_ptr) {
MS_EXCEPTION_IF_NULL(ms_context_ptr);
if (ms_context_ptr->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
bool GeDeviceContext::FinalizeGe(const std::shared_ptr<MsContext> &inst_context) {
MS_EXCEPTION_IF_NULL(inst_context);
if (inst_context->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
return true;
}
ms_context_ptr->decrease_param<uint32_t>(MS_CTX_GE_REF);
if (ms_context_ptr->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
ms_context_ptr->set_param<uint32_t>(MS_CTX_GE_REF, 0);
inst_context->decrease_param<uint32_t>(MS_CTX_GE_REF);
if (inst_context->get_param<uint32_t>(MS_CTX_GE_REF) == 0) {
inst_context->set_param<uint32_t>(MS_CTX_GE_REF, 0);
try {
transform::ClearGeSessionAndRunner();
} catch (const std::exception &e) {
@ -671,10 +671,10 @@ bool GeDeviceContext::FinalizeGe(const std::shared_ptr<MsContext> &ms_context_pt
if (ge::GEFinalize() != ge::GRAPH_SUCCESS) {
MS_LOG(WARNING) << "Finalize GE failed!";
}
ms_context_ptr->set_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT, false);
inst_context->set_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT, false);
} else {
MS_LOG(INFO) << "Ge is used, no need to finalize, tsd reference = "
<< ms_context_ptr->get_param<uint32_t>(MS_CTX_GE_REF) << ".";
<< inst_context->get_param<uint32_t>(MS_CTX_GE_REF) << ".";
}
return true;
}

View File

@ -41,11 +41,10 @@ class GeDeviceResManager : public DeviceResManager {
std::vector<void *> AllocateContinuousMemory(const std::vector<size_t> &size_list) const override;
DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format, TypeId type_id,
const ShapeVector &shape = ShapeVector()) const override;
const ShapeVector &shape) const override;
static void CreateSessionAndGraphRunner(bool is_training);
protected:
// Relevant function to allocate and free device memory of raw ptr.
void *AllocateMemory(size_t size) const override;
void FreeMemory(void *ptr) const override;

View File

@ -91,7 +91,7 @@ using DeviceContextPtr = std::shared_ptr<DeviceContext>;
class DeviceResManager {
public:
DeviceResManager() : collective_comm_lib_(nullptr) {}
DeviceResManager() : collective_comm_lib_(nullptr), device_context_(nullptr) {}
virtual ~DeviceResManager() = default;
// Initialize the device resource manager.

View File

@ -247,10 +247,10 @@ class DimReduce(Cell):
_save_weight(self.gk_last, self.gk_last_back)
_save_weight(self.bk, self.bk_back)
clone = self._res_loss(old_grad, grad_proj, weight, weight_clone, rho)
clone = self._res_loss(old_grad, grad_proj, weight, weight_clone, rho, dn)
return F.depend(loss, clone)
def _res_loss(self, old_grad, grad_proj, weight, weight_clone, rho):
def _res_loss(self, old_grad, grad_proj, weight, weight_clone, rho, dn):
"""update loss"""
update_grad = self.hyper_map(F.partial(_update_grad_res_momentum, self.gamma, self.alpha),
self.grad_res_momentum, old_grad, grad_proj)