forked from mindspore-Ecosystem/mindspore
replace SyncRun with ParallelLaunch for cpu kernel
This commit is contained in:
parent
c29d6bb764
commit
f93e971c2e
|
@ -280,7 +280,7 @@ void CPUKernelUtils::ParallelFor(const CTask &task, size_t count, float block_si
|
|||
(void)tasks.emplace_back(block);
|
||||
start += once_compute_size;
|
||||
}
|
||||
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
|
||||
ParallelLaunch(tasks);
|
||||
}
|
||||
|
||||
// Search for best block_size to get best thread num : 1 2 4 8 16 23(32)
|
||||
|
|
|
@ -112,7 +112,7 @@ float FusedAdaFactorCpuKernelMod::CalcRMS(T *input, size_t elem_num) {
|
|||
};
|
||||
(void)tasks.emplace_back(block);
|
||||
}
|
||||
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
|
||||
ParallelLaunch(tasks);
|
||||
auto rms = std::accumulate(block_sum.begin(), block_sum.end(), 0.0f);
|
||||
rms = rms / elem_num;
|
||||
return std::sqrt(rms);
|
||||
|
|
|
@ -396,7 +396,7 @@ void TransposeFwdCpuKernelMod::ParallelRun(const T *input_addr, T *output_addr,
|
|||
};
|
||||
(void)tasks.emplace_back(task);
|
||||
}
|
||||
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
|
||||
ParallelLaunch(tasks);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
Loading…
Reference in New Issue