2010-11-23 08:40:35 +08:00
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
2010-11-24 03:52:03 +08:00
2010-11-23 08:40:35 +08:00
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
------------------------------------------------------------------------- */
#include "pair_gpu_device.h"
#include <math.h>
#define _HD_BALANCE_EVERY 25
#define _HD_BALANCE_WEIGHT 0.5
2011-05-02 23:02:52 +08:00
#define _HD_BALANCE_GAP 1.10
2010-11-23 08:40:35 +08:00
/// Host/device load balancer
template<class numtyp, class acctyp>
class PairGPUBalance {
inline PairGPUBalance() : _init_done(false), _measure_this_step(false) {}
inline ~PairGPUBalance() { clear(); }
/// Clear any old data and setup for new LAMMPS run
2011-05-02 23:02:52 +08:00
inline void init(PairGPUDevice<numtyp, acctyp> *gpu, const bool gpu_nbor,
const double split);
2010-11-23 08:40:35 +08:00
/// Clear all host and device data
inline void clear() {
if (_init_done) {
2011-05-02 23:02:52 +08:00
/// Return the timestep since initialization
inline int timestep() { return _timestep; }
2010-11-23 08:40:35 +08:00
/// Get a count of the number of particles host will handle for initial alloc
2011-05-02 23:02:52 +08:00
inline int first_host_count(const int nlocal, const double gpu_split,
const bool gpu_nbor) const {
2010-11-23 08:40:35 +08:00
int host_nlocal=0;
if (gpu_nbor && gpu_split!=1.0) {
if (gpu_split>0)
2011-05-02 23:02:52 +08:00
2010-11-23 08:40:35 +08:00
return host_nlocal;
/// Return the number of particles the device will handle this timestep
2011-05-02 23:02:52 +08:00
inline int get_gpu_count(const int ago, const int inum_full);
2010-11-23 08:40:35 +08:00
/// Return the average fraction of particles handled by device on all procs
inline double all_avg_split() {
if (_load_balance) {
double _all_avg_split=0.0;
2011-01-12 23:24:04 +08:00
2010-11-23 08:40:35 +08:00
return _all_avg_split/_avg_count;
} else
return _actual_split;
/// If CPU neighboring, allow the device fraction to increase on 2nd timestep
inline int ago_first(int ago) const
{ if (_avg_count==1 && _actual_split<_desired_split) ago=0; return ago; }
/// Start the timer for asynchronous device execution
inline void start_timer() {
if (_measure_this_step) {
2011-01-12 23:24:04 +08:00
2011-05-02 23:02:52 +08:00
2010-11-23 08:40:35 +08:00
2011-01-12 23:24:04 +08:00
2010-11-23 08:40:35 +08:00
/// Stop the timer for asynchronous device execution
inline void stop_timer() { if (_measure_this_step) { _device_time.stop(); } }
/// Calculate the new host/device split based on the cpu and device times
/** \note Only does calculation every _HD_BALANCE_EVERY timesteps
(and first 10) **/
2011-05-02 23:02:52 +08:00
inline void balance(const double cpu_time);
2010-11-23 08:40:35 +08:00
/// Calls balance() and then get_gpu_count()
2011-05-02 23:02:52 +08:00
inline int balance(const int ago,const int inum_full,const double cpu_time) {
return get_gpu_count(ago,inum_full);
2010-11-23 08:40:35 +08:00
PairGPUDevice<numtyp,acctyp> *_device;
UCL_Timer _device_time;
2011-05-02 23:02:52 +08:00
bool _init_done, _gpu_nbor;
2010-11-23 08:40:35 +08:00
bool _load_balance;
double _actual_split, _avg_split, _desired_split, _max_split;
int _avg_count;
bool _measure_this_step;
2011-05-02 23:02:52 +08:00
int _inum, _inum_full, _timestep;
2010-11-23 08:40:35 +08:00
#define PairGPUBalanceT PairGPUBalance<numtyp,acctyp>
template <class numtyp, class acctyp>
2011-05-02 23:02:52 +08:00
void PairGPUBalanceT::init(PairGPUDevice<numtyp, acctyp> *gpu,
const bool gpu_nbor, const double split) {
2010-11-23 08:40:35 +08:00
2011-05-02 23:02:52 +08:00
2010-11-23 08:40:35 +08:00
if (split<0.0) {
2011-05-02 23:02:52 +08:00
2010-11-23 08:40:35 +08:00
} else {
2011-05-02 23:02:52 +08:00
2010-11-23 08:40:35 +08:00
template <class numtyp, class acctyp>
2011-05-02 23:02:52 +08:00
int PairGPUBalanceT::get_gpu_count(const int ago, const int inum_full) {
2010-11-23 08:40:35 +08:00
if (_load_balance) {
2011-05-02 23:02:52 +08:00
if (_avg_count<11 || _timestep%_HD_BALANCE_EVERY==0) {
2010-11-23 08:40:35 +08:00
if (ago==0) {
if (_inum==0) _inum++;
2011-05-02 23:02:52 +08:00
2010-11-23 08:40:35 +08:00
return _inum;
template <class numtyp, class acctyp>
2011-05-02 23:02:52 +08:00
void PairGPUBalanceT::balance(const double cpu_time) {
2010-11-23 08:40:35 +08:00
if (_measure_this_step) {
2011-05-02 23:02:52 +08:00
double gpu_time=_device_time.seconds();
double max_gpu_time;
2010-11-23 08:40:35 +08:00
if (_inum_full==_inum) {
2011-05-02 23:02:52 +08:00
double cpu_time_per_atom=cpu_time/(_inum_full-_inum);
double cpu_other_time=_device->host_time()-cpu_time;
int host_inum=static_cast<int>((max_gpu_time-cpu_other_time)/
2010-11-23 08:40:35 +08:00
2011-05-02 23:02:52 +08:00
double split=static_cast<double>(_inum_full-host_inum)/_inum_full;
if (_desired_split>1.0)
if (_desired_split<0.0)
2010-11-23 08:40:35 +08:00
2011-05-02 23:02:52 +08:00
if (!_gpu_nbor) {
2010-11-23 08:40:35 +08:00
if (_desired_split<_max_split)
2011-05-02 23:02:52 +08:00
//std::cout << gpu_time << " " << max_gpu_time << " " << cpu_other_time << " " << cpu_time_per_atom << " " << cpu_time << " " << _desired_split << " " << host_inum << std::endl;
2010-11-23 08:40:35 +08:00