replace SyncRun with ParallelLaunch for cpu kernel

This commit is contained in:
kswang 2022-05-09 15:21:24 +08:00
parent c29d6bb764
commit f93e971c2e
3 changed files with 3 additions and 3 deletions

View File

@ -280,7 +280,7 @@ void CPUKernelUtils::ParallelFor(const CTask &task, size_t count, float block_si
(void)tasks.emplace_back(block);
start += once_compute_size;
}
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
ParallelLaunch(tasks);
}
// Search for best block_size to get best thread num : 1 2 4 8 16 23(32)

View File

@ -112,7 +112,7 @@ float FusedAdaFactorCpuKernelMod::CalcRMS(T *input, size_t elem_num) {
};
(void)tasks.emplace_back(block);
}
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
ParallelLaunch(tasks);
auto rms = std::accumulate(block_sum.begin(), block_sum.end(), 0.0f);
rms = rms / elem_num;
return std::sqrt(rms);

View File

@ -396,7 +396,7 @@ void TransposeFwdCpuKernelMod::ParallelRun(const T *input_addr, T *output_addr,
};
(void)tasks.emplace_back(task);
}
(void)common::ThreadPool::GetInstance().SyncRun(tasks);
ParallelLaunch(tasks);
}
template <typename T>