forked from lijiext/lammps
92 lines
3.1 KiB
C++
92 lines
3.1 KiB
C++
/***************************************************************************
|
|
pair_gpu_nbor.h
|
|
-------------------
|
|
W. Michael Brown
|
|
|
|
Neighbor memory operations for LAMMPS GPU Library
|
|
|
|
__________________________________________________________________________
|
|
This file is part of the LAMMPS GPU Library
|
|
__________________________________________________________________________
|
|
|
|
begin : Tue Aug 4 2009
|
|
copyright : (C) 2009 by W. Michael Brown
|
|
email : wmbrown@sandia.gov
|
|
***************************************************************************/
|
|
|
|
/* -----------------------------------------------------------------------
|
|
Copyright (2009) Sandia Corporation. Under the terms of Contract
|
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
|
certain rights in this software. This software is distributed under
|
|
the GNU General Public License.
|
|
----------------------------------------------------------------------- */
|
|
|
|
#ifndef PAIR_GPU_NBOR_H
|
|
#define PAIR_GPU_NBOR_H
|
|
|
|
#include "nvc_macros.h"
|
|
#include "nvc_timer.h"
|
|
#include "nvc_memory.h"
|
|
|
|
#define BLOCK_1D 64
|
|
#define IJ_SIZE 131072
|
|
|
|
class PairGPUNbor {
|
|
public:
|
|
PairGPUNbor() : _use_packing(false), allocated(false) {}
|
|
~PairGPUNbor() { clear(); }
|
|
|
|
/// Determine whether neighbor packing should be used
|
|
/** If true, twice as much memory is reserved to allow packing neighbors by
|
|
* atom for coalesced access after cutoff evaluation. This can be used
|
|
* for expensive potentials where it is more efficient to evaluate the
|
|
* cutoff separately from the potential in order to reduce thread divergence
|
|
* for expensive routines **/
|
|
void packing(const bool use_packing) { _use_packing=use_packing; }
|
|
|
|
/// Called once to allocate memory
|
|
void init(const int ij_size, const int max_atoms, const int max_nbors);
|
|
|
|
/// Free all memory on host and device
|
|
void clear();
|
|
|
|
/// Bytes per atom used on device
|
|
int bytes_per_atom(const int max_nbors) const;
|
|
/// Total host memory used by class
|
|
double host_memory_usage() const;
|
|
|
|
/// Reset neighbor data (first time or from a rebuild)
|
|
void reset(const int inum, int *ilist, const int *numj, cudaStream_t &s);
|
|
/// Add neighbor data from host
|
|
inline void add(const int num_ij, cudaStream_t &s)
|
|
{ host_ij.copy_to_device(ij.begin()+ij_total,num_ij,s); ij_total+=num_ij; }
|
|
|
|
/// Pack neighbors satisfying cutoff by atom for coalesced access
|
|
void pack_nbors(const int GX, const int BX, const int start,
|
|
const int inum, const int form_low, const int form_high);
|
|
|
|
|
|
// ------------------------------- Data -------------------------------
|
|
|
|
// Store IJ interactions on device
|
|
NVC_VecI ij;
|
|
// Buffer for moving ij data to GPU
|
|
NVC_HostI host_ij;
|
|
|
|
// --------------- Atom neighbors
|
|
// 3 x n
|
|
// - 1st row is i
|
|
// - 2nd row is numj (number of neighbors)
|
|
// - 3rd row is starting address in host_ij of neighbors
|
|
NVC_MatI dev_nbor;
|
|
|
|
// --------------- Timing Stuff
|
|
NVCTimer time_nbor;
|
|
|
|
int ij_total;
|
|
private:
|
|
bool allocated, _use_packing;
|
|
};
|
|
|
|
#endif
|