commit
4b7b646018
|
@ -172,7 +172,7 @@ void DuplexPipe::SignalHandler::SetAlarm(unsigned int interval_secs) const {
|
|||
alarm(interval_secs);
|
||||
}
|
||||
|
||||
void DuplexPipe::SignalHandler::CancelAlarm() const { alarm(0); }
|
||||
void DuplexPipe::SignalHandler::CancelAlarm() const { (void)alarm(0); }
|
||||
|
||||
void DuplexPipe::SignalHandler::SigAlarmHandler(int sig) {
|
||||
DP_INFO << "Signal: " << sig << ", child_pid_: " << child_pid_;
|
||||
|
|
|
@ -104,7 +104,7 @@ bool ThreadPool::SyncRun(const std::vector<Task> &tasks) {
|
|||
}
|
||||
|
||||
ThreadPool &ThreadPool::GetInstance() {
|
||||
static ThreadPool instance;
|
||||
static ThreadPool instance{};
|
||||
return instance;
|
||||
}
|
||||
|
||||
|
@ -123,6 +123,11 @@ void ThreadPool::ClearThreadPool() {
|
|||
sync_run_threads_.clear();
|
||||
}
|
||||
|
||||
ThreadPool::~ThreadPool() { ClearThreadPool(); }
|
||||
ThreadPool::~ThreadPool() {
|
||||
try {
|
||||
ClearThreadPool();
|
||||
} catch (...) {
|
||||
}
|
||||
}
|
||||
} // namespace common
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -38,7 +38,7 @@ void AscendBucket::AllocateAllReduceAddr() {
|
|||
<< " is not equal to bucket size:" << bucket_size_;
|
||||
}
|
||||
|
||||
auto total_size = 0;
|
||||
size_t total_size = 0;
|
||||
std::vector<size_t> origin_size_list;
|
||||
for (auto &tensor : grad_tensor_list_) {
|
||||
MS_EXCEPTION_IF_NULL(tensor);
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#define PATH_MAX 4096
|
||||
#include "runtime/device/ascend/ascend_kernel_runtime.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
@ -71,6 +70,7 @@ constexpr uint32_t kTupleStreamId = 1;
|
|||
constexpr uint32_t kTupleArgs = 2;
|
||||
constexpr uint32_t kProfilingMaxTaskIdInStream = 65531;
|
||||
constexpr auto kModuleName = "MindSpore";
|
||||
constexpr size_t kPathMax = 4096;
|
||||
|
||||
namespace mindspore::device::ascend {
|
||||
static thread_local rtContext_t thread_local_rt_context{nullptr};
|
||||
|
@ -510,16 +510,16 @@ void AscendKernelRuntime::LaunchDataDump(GraphId graph_id) {
|
|||
void AscendKernelRuntime::TaskFailCallback(rtExceptionInfo *task_fail_info) {
|
||||
MS_EXCEPTION_IF_NULL(task_fail_info);
|
||||
static std::mutex exception_mutex;
|
||||
constexpr uint32_t kOverflowThreshold = 5;
|
||||
std::lock_guard<std::mutex> lock(exception_mutex);
|
||||
if (task_fail_info->retcode == ACL_ERROR_RT_AICORE_OVER_FLOW) {
|
||||
auto node = AscendKernelRuntime::GetErrorNodeName(task_fail_info->streamid, task_fail_info->taskid);
|
||||
|
||||
if (!node) {
|
||||
MS_LOG(WARNING) << "Node run task overflow, node name is unknown.";
|
||||
} else {
|
||||
auto key = std::to_string(task_fail_info->streamid) + std::to_string(task_fail_info->taskid) +
|
||||
std::to_string(current_graph_->graph_id());
|
||||
if (overflow_tasks_.find(key) == overflow_tasks_.end() || overflow_tasks_[key] == 5) {
|
||||
if (overflow_tasks_.find(key) == overflow_tasks_.end() || overflow_tasks_[key] == kOverflowThreshold) {
|
||||
// print overflow info
|
||||
MS_LOG(WARNING) << "Node run task overflow, node name: " << node->fullname_with_scope()
|
||||
<< "Task overflow infos task_id: " << task_fail_info->taskid
|
||||
|
@ -820,7 +820,7 @@ bool AscendKernelRuntime::HcclInit() {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
if (strlen(config_path_str) > PATH_MAX) {
|
||||
if (strlen(config_path_str) > kPathMax) {
|
||||
MS_LOG(ERROR) << "File path oversize";
|
||||
return false;
|
||||
}
|
||||
|
@ -949,7 +949,7 @@ int AscendKernelRuntime::DeleteDumpFile(std::string path) {
|
|||
}
|
||||
|
||||
std::string AscendKernelRuntime::GetRealPath(std::string path) {
|
||||
char real_path_mem[PATH_MAX] = {0};
|
||||
char real_path_mem[kPathMax] = {0};
|
||||
char *real_path_ret = realpath(path.c_str(), real_path_mem);
|
||||
if (real_path_ret == nullptr) {
|
||||
return "";
|
||||
|
|
|
@ -115,7 +115,7 @@ uint8_t *AscendMemoryManager::MallocStaticMem(size_t size, bool communication_me
|
|||
MS_LOG(INFO) << "Add graph memory node for static memory profiling, graph id is " << graph_id;
|
||||
}
|
||||
|
||||
node->AddStaticMemorySize(align_size);
|
||||
node->AddStaticMemorySize(SizeToUint(align_size));
|
||||
}
|
||||
|
||||
if (communication_mem) {
|
||||
|
|
|
@ -471,7 +471,7 @@ bool AscendStreamAssign::FinetuneSubgraphExecOrder(vector<CNodePtr> *cnodes) {
|
|||
vector<CNodePtr> ori_cnodes(cnodes->begin(), cnodes->end());
|
||||
cnodes->clear();
|
||||
vector<CNodePtr> atomic_addr_clean;
|
||||
for (auto iter = ori_cnodes.begin(); iter < ori_cnodes.end(); iter++) {
|
||||
for (auto iter = ori_cnodes.begin(); iter < ori_cnodes.end(); ++iter) {
|
||||
if (AnfAlgo::GetCNodeName(*iter) == kAtomicAddrCleanOpName) {
|
||||
atomic_addr_clean.emplace_back(*iter);
|
||||
continue;
|
||||
|
@ -539,7 +539,7 @@ void AscendStreamAssign::TrailingTimeOptimizationByReorder(const NotNull<KernelG
|
|||
auto pos = last_grad_pos + moved_forward_cnodes.size() + moved_backward_cnodes.size() + 1;
|
||||
while (pos < cnode_ptr_list.end() && AnfAlgo::GetGraphId((*pos).get()) != subgraph_id) {
|
||||
cnodes.emplace_back(*pos);
|
||||
pos++;
|
||||
++pos;
|
||||
}
|
||||
|
||||
vector<CNodePtr> subgraph_cnodes;
|
||||
|
@ -553,7 +553,7 @@ void AscendStreamAssign::TrailingTimeOptimizationByReorder(const NotNull<KernelG
|
|||
subgraph_cnodes.insert(subgraph_cnodes.end(), moved_backward_cnodes.begin(), moved_backward_cnodes.end());
|
||||
subgraph_cnodes.emplace_back(*pos);
|
||||
}
|
||||
pos++;
|
||||
++pos;
|
||||
}
|
||||
|
||||
if (!FinetuneSubgraphExecOrder(&subgraph_cnodes) || subgraph_cnodes.empty()) {
|
||||
|
@ -2405,7 +2405,7 @@ void AscendStreamAssign::FindEventRelations(const NotNull<KernelGraphPtr> &graph
|
|||
if (!flag) {
|
||||
begin = event_map_.erase(begin);
|
||||
} else {
|
||||
begin++;
|
||||
++begin;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -372,9 +372,9 @@ void CheckFormatsAndDtypes(const CNodePtr &kernel_node, const std::vector<AnfNod
|
|||
}
|
||||
auto user_format = AnfAlgo::GetInputFormat(node_user.first, IntToSize(node_user.second - 1));
|
||||
if (user_format != (*graph_input_format)[i]) {
|
||||
MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString(2) << " of ["
|
||||
MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString() << " of ["
|
||||
<< kernel_node->DebugString()
|
||||
<< "] selected different format. we use defult: " << default_format;
|
||||
<< "] selected different format. we use default: " << default_format;
|
||||
(*graph_input_format)[i] = default_format;
|
||||
(*need_update)[i] = true;
|
||||
}
|
||||
|
@ -385,7 +385,7 @@ void CheckFormatsAndDtypes(const CNodePtr &kernel_node, const std::vector<AnfNod
|
|||
}
|
||||
|
||||
TypeId default_dtype = AnfAlgo::GetOutputInferDataType(input, 0);
|
||||
MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString(2) << " of ["
|
||||
MS_LOG(WARNING) << "Users of input: [" << i << "][" << input->DebugString() << " of ["
|
||||
<< kernel_node->DebugString()
|
||||
<< "] selected different dtype. we use default: " << TypeIdLabel(default_dtype);
|
||||
(*graph_input_type)[i] = default_dtype;
|
||||
|
|
|
@ -296,7 +296,7 @@ void TaskGenerator::DumpTaskInfo(const std::string &real_filename) {
|
|||
OrderedMap<AnfNodePtr, int32_t> para_map;
|
||||
std::string path_string = real_path;
|
||||
ChangeFileMode(path_string, S_IRWXU);
|
||||
SaveTaskDebugInfoToFile(real_path, task_debug_info_list_);
|
||||
SaveTaskDebugInfoToFile(path_string, task_debug_info_list_);
|
||||
// set file mode to read only by user
|
||||
ChangeFileMode(path_string, S_IRUSR);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue