lammps/lib/gpu/pair_gpu_nbor.h

89 lines
3.1 KiB
C++

/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Mike Brown (SNL), wmbrown@sandia.gov
Peng Wang (Nvidia), penwang@nvidia.com
Paul Crozier (SNL), pscrozi@sandia.gov
------------------------------------------------------------------------- */
#ifndef PAIR_GPU_NBOR_H
#define PAIR_GPU_NBOR_H
#include "nvc_macros.h"
#include "nvc_timer.h"
#include "nvc_memory.h"
#define IJ_SIZE 131072
class PairGPUNbor {
public:
PairGPUNbor() : _use_packing(false), allocated(false) {}
~PairGPUNbor() { clear(); }
/// Determine whether neighbor packing should be used
/** If true, twice as much memory is reserved to allow packing neighbors by
* atom for coalesced access after cutoff evaluation. This can be used
* for expensive potentials where it is more efficient to evaluate the
* cutoff separately from the potential in order to reduce thread divergence
* for expensive routines **/
void packing(const bool use_packing) { _use_packing=use_packing; }
/// Called once to allocate memory
bool init(const int ij_size, const int max_atoms, const int max_nbors);
void resize(const int nlocal, const int max_nbor, bool &success);
/// Free all memory on host and device
void clear();
/// Bytes per atom used on device
int bytes_per_atom(const int max_nbors) const;
/// Total host memory used by class
double host_memory_usage() const;
/// Reset neighbor data (first time or from a rebuild)
void reset(const int inum, int *ilist, const int *numj, cudaStream_t &s);
/// Add neighbor data from host
inline void add(const int num_ij, cudaStream_t &s)
{ host_ij.copy_to_device(ij.begin()+ij_total,num_ij,s); ij_total+=num_ij; }
/// Pack neighbors satisfying cutoff by atom for coalesced access
void pack_nbors(const int GX, const int BX, const int start,
const int inum, const int form_low, const int form_high);
// ------------------------------- Data -------------------------------
// Store IJ interactions on device
NVC_VecI ij;
// Buffer for moving ij data to GPU
NVC_HostI host_ij;
// --------------- Atom neighbors
// 3 x n
// - 1st row is i
// - 2nd row is numj (number of neighbors)
// - 3rd row is starting address in host_ij of neighbors
NVC_VecI dev_nbor;
// --------------- Timing Stuff
NVCTimer time_nbor;
int ij_total;
private:
bool allocated, _use_packing;
};
#endif