forked from lijiext/lammps
156 lines
5.8 KiB
C++
156 lines
5.8 KiB
C++
/* ----------------------------------------------------------------------
|
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
|
http://lammps.sandia.gov, Sandia National Laboratories
|
|
Steve Plimpton, sjplimp@sandia.gov
|
|
|
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
|
|
See the README file in the top-level LAMMPS directory.
|
|
------------------------------------------------------------------------- */
|
|
|
|
/* ----------------------------------------------------------------------
|
|
Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
|
|
------------------------------------------------------------------------- */
|
|
|
|
#ifndef PAIR_GPU_DEVICE_H
|
|
#define PAIR_GPU_DEVICE_H
|
|
|
|
#include "pair_gpu_atom.h"
|
|
#include "pair_gpu_nbor.h"
|
|
#include "mpi.h"
|
|
#include <sstream>
|
|
#include "stdio.h"
|
|
#include <string>
|
|
|
|
template <class numtyp, class acctyp>
|
|
class PairGPUDevice {
|
|
public:
|
|
PairGPUDevice();
|
|
~PairGPUDevice();
|
|
|
|
/// Initialize the device for use by this process
|
|
/** Sets up a per-device MPI communicator for load balancing and initializes
|
|
* the device (>=first_gpu and <=last_gpu) that this proc will be using **/
|
|
bool init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
|
const int last_gpu, const int gpu_mode,
|
|
const double particle_split, const int nthreads);
|
|
|
|
/// Initialize the device for Atom and Neighbor storage
|
|
/** \param rot True if quaternions need to be stored
|
|
* \param nlocal Total number of local particles to allocate memory for
|
|
* \param host_nlocal Initial number of host particles to allocate memory for
|
|
* \param nall Total number of local+ghost particles
|
|
* \param gpu_nbor True if neighboring is performed on device
|
|
* \param gpu_host 0 if host will not perform force calculations,
|
|
* 1 if gpu_nbor is true, and host needs a half nbor list,
|
|
* 2 if gpu_nbor is true, and host needs a full nbor list
|
|
* \param max_nbors Initial number of rows in the neighbor matrix
|
|
* \param cell_size cutoff+skin
|
|
* \param pre_cut True if cutoff test will be performed in separate kernel
|
|
* than the force kernel **/
|
|
bool init(const bool charge, const bool rot, const int nlocal,
|
|
const int host_nlocal, const int nall, const int maxspecial,
|
|
const bool gpu_nbor, const int gpu_host, const int max_nbors,
|
|
const double cell_size, const bool pre_cut);
|
|
|
|
/// Output a message for pair_style acceleration with device stats
|
|
void init_message(FILE *screen, const char *name,
|
|
const int first_gpu, const int last_gpu);
|
|
|
|
/// Output a message with timing information
|
|
void output_times(UCL_Timer &time_pair, const double avg_split,
|
|
const double max_bytes, FILE *screen);
|
|
|
|
/// Clear all memory on host and device associated with atom and nbor data
|
|
void clear();
|
|
|
|
/// Clear all memory on host and device
|
|
void clear_device();
|
|
|
|
/// Start timer on host
|
|
inline void start_host_timer() { _cpu_full=MPI_Wtime(); }
|
|
|
|
/// Stop timer on host
|
|
inline void stop_host_timer() { _cpu_full=MPI_Wtime()-_cpu_full; }
|
|
|
|
/// Return host time
|
|
inline double host_time() { return _cpu_full; }
|
|
|
|
/// Return host memory usage in bytes
|
|
double host_memory_usage() const;
|
|
|
|
/// Return the number of procs sharing a device (size of device commincator)
|
|
inline int procs_per_gpu() const { return _procs_per_gpu; }
|
|
/// Return the number of threads per proc
|
|
inline int num_threads() const { return _nthreads; }
|
|
/// My rank within all processes
|
|
inline int world_me() const { return _world_me; }
|
|
/// Total number of processes
|
|
inline int world_size() const { return _world_size; }
|
|
/// MPI Barrier for world
|
|
inline void world_barrier() { MPI_Barrier(_comm_world); }
|
|
/// Return the replica MPI communicator
|
|
inline MPI_Comm & replica() { return _comm_replica; }
|
|
/// My rank within replica communicator
|
|
inline int replica_me() const { return _replica_me; }
|
|
/// Number of procs in replica communicator
|
|
inline int replica_size() const { return _replica_size; }
|
|
/// Return the per-GPU MPI communicator
|
|
inline MPI_Comm & gpu_comm() { return _comm_gpu; }
|
|
/// Return my rank in the device communicator
|
|
inline int gpu_rank() const { return _gpu_rank; }
|
|
/// MPI Barrier for gpu
|
|
inline void gpu_barrier() { MPI_Barrier(_comm_gpu); }
|
|
/// Return the 'mode' for acceleration: GPU_FORCE or GPU_NEIGH
|
|
inline int gpu_mode() const { return _gpu_mode; }
|
|
/// Index of first device used by a node
|
|
inline int first_device() const { return _first_device; }
|
|
/// Index of last device used by a node
|
|
inline int last_device() const { return _last_device; }
|
|
/// Particle split defined in fix
|
|
inline double particle_split() const { return _particle_split; }
|
|
/// Return the initialization count for the device
|
|
inline int init_count() const { return _init_count; }
|
|
|
|
// -------------------------- DEVICE DATA -------------------------
|
|
|
|
/// Geryon Device
|
|
UCL_Device *gpu;
|
|
|
|
enum{GPU_FORCE, GPU_NEIGH};
|
|
|
|
// --------------------------- ATOM DATA --------------------------
|
|
|
|
/// Atom Data
|
|
PairGPUAtom<numtyp,acctyp> atom;
|
|
|
|
// --------------------------- NBOR DATA ----------------------------
|
|
|
|
/// Neighbor Data
|
|
PairGPUNbor nbor;
|
|
|
|
private:
|
|
int _init_count;
|
|
bool _device_init;
|
|
MPI_Comm _comm_world, _comm_replica, _comm_gpu;
|
|
int _procs_per_gpu, _gpu_rank, _world_me, _world_size, _replica_me,
|
|
_replica_size;
|
|
int _gpu_mode, _first_device, _last_device, _nthreads;
|
|
double _particle_split;
|
|
double _cpu_full;
|
|
|
|
template <class t>
|
|
inline std::string toa(const t& in) {
|
|
std::ostringstream o;
|
|
o.precision(2);
|
|
o << in;
|
|
return o.str();
|
|
}
|
|
|
|
};
|
|
|
|
#endif
|