forked from lijiext/lammps
USER-INTEL add-ons from Mike
This commit is contained in:
parent
0b3f1b8a15
commit
1d4d2155a2
Binary file not shown.
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 20 KiB |
|
@ -29,8 +29,10 @@ Bond Styles: fene, harmonic :l
|
|||
Dihedral Styles: charmm, harmonic, opls :l
|
||||
Fixes: nve, npt, nvt, nvt/sllod :l
|
||||
Improper Styles: cvff, harmonic :l
|
||||
Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
|
||||
charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l
|
||||
Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long,
|
||||
buck, eam, eam/alloy, eam/fs, gayberne, charmm/coul/charmm,
|
||||
charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, rebo,
|
||||
sw, tersoff :l
|
||||
K-Space Styles: pppm, pppm/disp :l
|
||||
:ule
|
||||
|
||||
|
|
|
@ -7,10 +7,13 @@
|
|||
:line
|
||||
|
||||
pair_style airebo command :h3
|
||||
pair_style airebo/intel command :h3
|
||||
pair_style airebo/omp command :h3
|
||||
pair_style airebo/morse command :h3
|
||||
pair_style airebo/morse/intel command :h3
|
||||
pair_style airebo/morse/omp command :h3
|
||||
pair_style rebo command :h3
|
||||
pair_style rebo/intel command :h3
|
||||
pair_style rebo/omp command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
:line
|
||||
|
||||
pair_style lj/charmm/coul/charmm command :h3
|
||||
pair_style lj/charmm/coul/charmm/intel command :h3
|
||||
pair_style lj/charmm/coul/charmm/omp command :h3
|
||||
pair_style lj/charmm/coul/charmm/implicit command :h3
|
||||
pair_style lj/charmm/coul/charmm/implicit/omp command :h3
|
||||
|
|
|
@ -14,6 +14,7 @@ pair_style eam/omp command :h3
|
|||
pair_style eam/opt command :h3
|
||||
pair_style eam/alloy command :h3
|
||||
pair_style eam/alloy/gpu command :h3
|
||||
pair_style eam/alloy/intel command :h3
|
||||
pair_style eam/alloy/kk command :h3
|
||||
pair_style eam/alloy/omp command :h3
|
||||
pair_style eam/alloy/opt command :h3
|
||||
|
@ -21,6 +22,7 @@ pair_style eam/cd command :h3
|
|||
pair_style eam/cd/omp command :h3
|
||||
pair_style eam/fs command :h3
|
||||
pair_style eam/fs/gpu command :h3
|
||||
pair_style eam/fs/intel command :h3
|
||||
pair_style eam/fs/kk command :h3
|
||||
pair_style eam/fs/omp command :h3
|
||||
pair_style eam/fs/opt command :h3
|
||||
|
|
|
@ -14,7 +14,7 @@ SHFLAGS = -fPIC
|
|||
DEPFLAGS = -M
|
||||
|
||||
LINK = mpiicpc
|
||||
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
|
||||
LINKFLAGS = -qopenmp $(OPTFLAGS)
|
||||
LIB = -ltbbmalloc
|
||||
SIZE = size
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ SHELL = /bin/sh
|
|||
# specify flags and libraries needed for your compiler
|
||||
|
||||
CC = mpicxx -cxx=icc
|
||||
OPTFLAGS = -xAVX -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
|
||||
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
|
||||
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
|
||||
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
|
||||
SHFLAGS = -fPIC
|
||||
|
|
|
@ -8,7 +8,7 @@ SHELL = /bin/sh
|
|||
|
||||
export OMPI_CXX = icc
|
||||
CC = mpicxx
|
||||
OPTFLAGS = -xAVX -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
|
||||
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
|
||||
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
|
||||
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
|
||||
SHFLAGS = -fPIC
|
||||
|
|
|
@ -46,6 +46,7 @@ action npair_intel.h
|
|||
action npair_intel.cpp
|
||||
action intel_simd.h pair_sw_intel.cpp
|
||||
action intel_intrinsics.h pair_tersoff_intel.cpp
|
||||
action intel_intrinsics_airebo.h pair_airebo_intel.cpp
|
||||
action verlet_lrt_intel.h pppm.cpp
|
||||
action verlet_lrt_intel.cpp pppm.cpp
|
||||
|
||||
|
|
|
@ -4,9 +4,9 @@
|
|||
--------------------------------
|
||||
|
||||
W. Michael Brown (Intel) michael.w.brown at intel.com
|
||||
Markus Hohnerbach (RWTH Aachen University)
|
||||
William McDoniel (RWTH Aachen University)
|
||||
Rodrigo Canales (RWTH Aachen University)
|
||||
Markus H<>hnerbach (RWTH Aachen University)
|
||||
Stan Moore (Sandia)
|
||||
Ahmed E. Ismail (RWTH Aachen University)
|
||||
Paolo Bientinesi (RWTH Aachen University)
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
# in.intel.sw - Silicon benchmark with Stillinger-Weber
|
||||
# in.intel.tersoff - Silicon benchmark with Tersoff
|
||||
# in.intel.water - Coarse-grain water benchmark using Stillinger-Weber
|
||||
# in.intel.airebo - Polyethelene benchmark with AIREBO
|
||||
#
|
||||
#############################################################################
|
||||
|
||||
|
@ -24,6 +25,7 @@
|
|||
# in.intel.sw - 132.4 161.9
|
||||
# in.intel.tersoff - 83.3 101.1
|
||||
# in.intel.water - 53.4 90.3
|
||||
# in.intel.airebo - 7.3 11.8
|
||||
#
|
||||
#############################################################################
|
||||
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
# AIREBO polyethelene benchmark
|
||||
|
||||
variable N index on # Newton Setting
|
||||
variable w index 10 # Warmup Timesteps
|
||||
variable t index 550 # Main Run Timesteps
|
||||
variable m index 1 # Main Run Timestep Multiplier
|
||||
variable n index 0 # Use NUMA Mapping for Multi-Node
|
||||
variable p index 0 # Use Power Measurement
|
||||
variable x index 4
|
||||
variable y index 2
|
||||
variable z index 2
|
||||
|
||||
variable xx equal 17*$x
|
||||
variable yy equal 16*$y
|
||||
variable zz equal 2*$z
|
||||
variable rr equal floor($t*$m)
|
||||
variable root getenv LMP_ROOT
|
||||
|
||||
newton $N
|
||||
if "$n > 0" then "processors * * * grid numa"
|
||||
|
||||
variable root getenv LMP_ROOT
|
||||
|
||||
units metal
|
||||
atom_style atomic
|
||||
|
||||
read_data ${root}/examples/airebo/data.airebo
|
||||
|
||||
replicate ${xx} ${yy} ${zz}
|
||||
|
||||
neighbor 0.5 bin
|
||||
neigh_modify delay 5 every 1
|
||||
|
||||
pair_style airebo 3.0 1 1
|
||||
pair_coeff * * ${root}/potentials/CH.airebo C H
|
||||
|
||||
velocity all create 300.0 761341
|
||||
|
||||
fix 1 all nve
|
||||
timestep 0.0005
|
||||
|
||||
thermo 50
|
||||
|
||||
if "$p > 0" then "run_style verlet/power"
|
||||
|
||||
if "$w > 0" then "run $w"
|
||||
run ${rr}
|
|
@ -5,7 +5,6 @@ variable w index 10 # Warmup Timesteps
|
|||
variable t index 3100 # Main Run Timesteps
|
||||
variable m index 1 # Main Run Timestep Multiplier
|
||||
variable n index 0 # Use NUMA Mapping for Multi-Node
|
||||
variable b index 3 # Neighbor binsize
|
||||
variable p index 0 # Use Power Measurement
|
||||
|
||||
variable x index 4
|
||||
|
|
|
@ -5,7 +5,6 @@ variable w index 10 # Warmup Timesteps
|
|||
variable t index 520 # Main Run Timesteps
|
||||
variable m index 1 # Main Run Timestep Multiplier
|
||||
variable n index 0 # Use NUMA Mapping for Multi-Node
|
||||
variable b index 3 # Neighbor binsize
|
||||
variable p index 0 # Use Power Measurement
|
||||
variable c index 0 # 1 to use collectives for PPPM
|
||||
variable d index 1 # 1 to use 'diff ad' for PPPM
|
||||
|
|
|
@ -30,6 +30,9 @@ IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
|
|||
_off_map_listlocal = 0;
|
||||
_ccachex = 0;
|
||||
_ncache_alloc = 0;
|
||||
_ncachetag = 0;
|
||||
_cutneighsq = 0;
|
||||
_cutneighghostsq = 0;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_separate_buffers = 0;
|
||||
_off_f = 0;
|
||||
|
@ -447,12 +450,17 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
|
|||
flt_t *ncachez = _ncachez;
|
||||
int *ncachej = _ncachej;
|
||||
int *ncachejtype = _ncachejtype;
|
||||
int *ncachetag = _ncachetag;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_off_ncache) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ncachex,ncachey,ncachez,ncachej:alloc_if(0) free_if(1)) \
|
||||
nocopy(ncachejtype:alloc_if(0) free_if(1))
|
||||
if (ncachetag) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ncachetag:alloc_if(0) free_if(1))
|
||||
}
|
||||
}
|
||||
_off_ncache = 0;
|
||||
#endif
|
||||
|
@ -462,8 +470,10 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
|
|||
lmp->memory->destroy(ncachez);
|
||||
lmp->memory->destroy(ncachej);
|
||||
lmp->memory->destroy(ncachejtype);
|
||||
|
||||
if (ncachetag)
|
||||
lmp->memory->destroy(ncachetag);
|
||||
_ncache_alloc = 0;
|
||||
_ncachetag = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -480,7 +490,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
|
|||
const int vsize = _ncache_stride * nt;
|
||||
|
||||
if (_ncache_alloc) {
|
||||
if (vsize > _ncache_alloc)
|
||||
if (vsize > _ncache_alloc || (need_tag() && _ncachetag == 0))
|
||||
free_ncache();
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (off_flag && _off_ncache == 0)
|
||||
|
@ -495,6 +505,8 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
|
|||
lmp->memory->create(_ncachez, vsize, "_ncachez");
|
||||
lmp->memory->create(_ncachej, vsize, "_ncachej");
|
||||
lmp->memory->create(_ncachejtype, vsize, "_ncachejtype");
|
||||
if (need_tag())
|
||||
lmp->memory->create(_ncachetag, vsize, "_ncachetag");
|
||||
|
||||
_ncache_alloc = vsize;
|
||||
|
||||
|
@ -513,6 +525,14 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
|
|||
nocopy(ncachez,ncachej:length(vsize) alloc_if(1) free_if(0)) \
|
||||
nocopy(ncachejtype:length(vsize) alloc_if(1) free_if(0))
|
||||
}
|
||||
int tsize = vsize;
|
||||
if (!need_tag()) {
|
||||
tsize = 16;
|
||||
lmp->memory->create(_ncachetag, tsize, "_ncachetag");
|
||||
}
|
||||
int *ncachetag = _ncachetag;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(ncachetag:length(tsize) alloc_if(1) free_if(0))
|
||||
_off_ncache = 1;
|
||||
}
|
||||
#endif
|
||||
|
@ -548,7 +568,8 @@ void IntelBuffers<flt_t, acc_t>::fdotr_reduce(const int nall,
|
|||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
|
||||
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes,
|
||||
const int use_ghost_cut)
|
||||
{
|
||||
if (ntypes != _ntypes) {
|
||||
if (_ntypes > 0) {
|
||||
|
@ -558,16 +579,34 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
|
|||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighsqo:alloc_if(0) free_if(1))
|
||||
}
|
||||
flt_t * cutneighghostsqo;
|
||||
if (_cutneighghostsq && _off_threads > 0 && cutneighghostsqo != 0) {
|
||||
cutneighghostsqo = _cutneighghostsq[0];
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighghostsqo:alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
lmp->memory->destroy(_cutneighsq);
|
||||
if (_cutneighghostsq != 0) lmp->memory->destroy(_cutneighghostsq);
|
||||
}
|
||||
if (ntypes > 0) {
|
||||
lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq");
|
||||
if (use_ghost_cut)
|
||||
lmp->memory->create(_cutneighghostsq, ntypes, ntypes,
|
||||
"_cutneighghostsq");
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
flt_t * cutneighsqo = _cutneighsq[0];
|
||||
const int ntypes2 = ntypes * ntypes;
|
||||
if (_off_threads > 0 && cutneighsqo != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0))
|
||||
nocopy(cutneighsqo:length(ntypes2) alloc_if(1) free_if(0))
|
||||
}
|
||||
if (use_ghost_cut) {
|
||||
flt_t * cutneighghostsqo = _cutneighghostsq[0];
|
||||
if (_off_threads > 0 && cutneighghostsqo != NULL) {
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
nocopy(cutneighghostsqo:length(ntypes2) alloc_if(1) free_if(0))
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -109,12 +109,14 @@ class IntelBuffers {
|
|||
|
||||
void free_ncache();
|
||||
void grow_ncache(const int off_flag, const int nthreads);
|
||||
void grow_ncachetag(const int off_flag, const int nthreads);
|
||||
inline int ncache_stride() { return _ncache_stride; }
|
||||
inline flt_t * get_ncachex() { return _ncachex; }
|
||||
inline flt_t * get_ncachey() { return _ncachey; }
|
||||
inline flt_t * get_ncachez() { return _ncachez; }
|
||||
inline int * get_ncachej() { return _ncachej; }
|
||||
inline int * get_ncachejtype() { return _ncachejtype; }
|
||||
inline int * get_ncachetag() { return _ncachetag; }
|
||||
|
||||
inline int get_max_nbors() {
|
||||
int mn = lmp->neighbor->oneatom * sizeof(int) /
|
||||
|
@ -131,7 +133,7 @@ class IntelBuffers {
|
|||
_grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
|
||||
}
|
||||
|
||||
void set_ntypes(const int ntypes);
|
||||
void set_ntypes(const int ntypes, const int use_ghost_cut = 0);
|
||||
|
||||
inline int * firstneigh(const NeighList *list) { return _list_alloc; }
|
||||
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
|
||||
|
@ -162,6 +164,7 @@ class IntelBuffers {
|
|||
inline void zero_ev()
|
||||
{ for (int i = 0; i < 8; i++) _ev_global[i] = _ev_global_host[i] = 0.0; }
|
||||
inline flt_t ** get_cutneighsq() { return _cutneighsq; }
|
||||
inline flt_t ** get_cutneighghostsq() { return _cutneighghostsq; }
|
||||
inline int get_off_threads() { return _off_threads; }
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
inline void set_off_params(const int n, const int cop,
|
||||
|
@ -274,13 +277,10 @@ class IntelBuffers {
|
|||
used_ghost * sizeof(flt_t));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
inline int need_tag() { return _need_tag; }
|
||||
inline void need_tag(const int nt) { _need_tag = nt; }
|
||||
#else
|
||||
inline int need_tag() { return 0; }
|
||||
inline void need_tag(const int nt) { }
|
||||
#endif
|
||||
|
||||
double memory_usage(const int nthreads);
|
||||
|
||||
|
@ -298,7 +298,7 @@ class IntelBuffers {
|
|||
int _list_alloc_atoms;
|
||||
int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked;
|
||||
|
||||
flt_t **_cutneighsq;
|
||||
flt_t **_cutneighsq, **_cutneighghostsq;
|
||||
int _ntypes;
|
||||
|
||||
int _ccache_stride;
|
||||
|
@ -307,7 +307,10 @@ class IntelBuffers {
|
|||
|
||||
int _ncache_stride, _ncache_alloc;
|
||||
flt_t *_ncachex, *_ncachey, *_ncachez;
|
||||
int *_ncachej, *_ncachejtype;
|
||||
int *_ncachej, *_ncachejtype, *_ncachetag;
|
||||
|
||||
int _need_tag, _host_nmax;
|
||||
|
||||
#ifdef LMP_USE_AVXCD
|
||||
int _ccache_stride3;
|
||||
acc_t * _ccachef;
|
||||
|
@ -324,7 +327,6 @@ class IntelBuffers {
|
|||
int *_off_map_special, *_off_map_nspecial, *_off_map_tag;
|
||||
int *_off_map_numneigh;
|
||||
bool _off_list_alloc;
|
||||
int _need_tag, _host_nmax;
|
||||
#endif
|
||||
|
||||
int _buf_size, _buf_local_size;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -211,6 +211,8 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
|
|||
for (i = nall-1; i >= nlocal; i--) {
|
||||
if (mask[i] & bitmask) {
|
||||
ibin = coord2bin(atom->x[i]);
|
||||
// Only necessary to store when neighboring ghost
|
||||
atombin[i] = ibin;
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
|
@ -222,14 +224,10 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
|
|||
binhead[ibin] = i;
|
||||
}
|
||||
} else {
|
||||
for (i = nall-1; i >= nlocal; i--) {
|
||||
for (i = nall-1; i >= 0; i--) {
|
||||
ibin = coord2bin(atom->x[i]);
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
for (i = nlocal-1; i >= 0; i--) {
|
||||
ibin = coord2bin(atom->x[i]);
|
||||
atombin[i]=ibin;
|
||||
// Only necessary to store for ghost when neighboring ghost
|
||||
atombin[i] = ibin;
|
||||
bins[i] = binhead[ibin];
|
||||
binhead[ibin] = i;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,593 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "npair_full_bin_ghost_intel.h"
|
||||
#include "neighbor.h"
|
||||
#include "nstencil.h"
|
||||
#include "neigh_list.h"
|
||||
#include "atom.h"
|
||||
#include "atom_vec.h"
|
||||
#include "molecule.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
NPairFullBinGhostIntel::NPairFullBinGhostIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
binned neighbor list construction for all neighbors
|
||||
include neighbors of ghost atoms, but no "special neighbors" for ghosts
|
||||
every neighbor pair appears in list of both atoms i and j
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void NPairFullBinGhostIntel::build(NeighList *list)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_fix->offload_noghost())
|
||||
error->all(FLERR,
|
||||
"The 'ghost no' option cannot be used with this USER-INTEL pair style.");
|
||||
#endif
|
||||
|
||||
if (nstencil > INTEL_MAX_STENCIL_CHECK)
|
||||
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (exclude)
|
||||
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
|
||||
#endif
|
||||
|
||||
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
fbi(list, _fix->get_mixed_buffers());
|
||||
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
fbi(list, _fix->get_double_buffers());
|
||||
else
|
||||
fbi(list, _fix->get_single_buffers());
|
||||
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class flt_t, class acc_t>
|
||||
void NPairFullBinGhostIntel::fbi(NeighList * list,
|
||||
IntelBuffers<flt_t,acc_t> * buffers)
|
||||
{
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
list->inum = atom->nlocal;
|
||||
list->gnum = atom->nghost;
|
||||
|
||||
int host_start = _fix->host_start_neighbor();
|
||||
const int off_end = _fix->offload_end_neighbor();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (off_end) grow_stencil();
|
||||
if (_fix->full_host_list()) host_start = 0;
|
||||
int offload_noghost = _fix->offload_noghost();
|
||||
#endif
|
||||
|
||||
// only uses offload_end_neighbor to check whether we are doing offloading
|
||||
// at all, no need to correct this later
|
||||
buffers->grow_list(list, nall, comm->nthreads, off_end,
|
||||
_fix->nbor_pack_width());
|
||||
|
||||
int need_ic = 0;
|
||||
if (atom->molecular)
|
||||
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
|
||||
neighbor->cutneighmax);
|
||||
|
||||
if (need_ic) {
|
||||
fbi<flt_t,acc_t,1>(1, list, buffers, 0, off_end);
|
||||
fbi<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal);
|
||||
} else {
|
||||
fbi<flt_t,acc_t,0>(1, list, buffers, 0, off_end);
|
||||
fbi<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class flt_t, class acc_t, int need_ic>
|
||||
void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
|
||||
IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const int pstart, const int pend) {
|
||||
if (pend-pstart == 0) return;
|
||||
|
||||
const int nall = atom->nlocal + atom->nghost;
|
||||
int pad = 1;
|
||||
int nall_t = nall;
|
||||
const int aend = nall;
|
||||
|
||||
const int pack_width = _fix->nbor_pack_width();
|
||||
const ATOM_T * _noalias const x = buffers->get_x();
|
||||
int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
const int e_nall = nall_t;
|
||||
|
||||
const int molecular = atom->molecular;
|
||||
int *ns = NULL;
|
||||
tagint *s = NULL;
|
||||
int tag_size = 0, special_size;
|
||||
if (buffers->need_tag()) tag_size = e_nall;
|
||||
if (molecular) {
|
||||
s = atom->special[0];
|
||||
ns = atom->nspecial[0];
|
||||
special_size = aend;
|
||||
} else {
|
||||
s = &buffers->_special_holder;
|
||||
ns = &buffers->_nspecial_holder;
|
||||
special_size = 0;
|
||||
}
|
||||
const tagint * _noalias const special = s;
|
||||
const int * _noalias const nspecial = ns;
|
||||
const int maxspecial = atom->maxspecial;
|
||||
const tagint * _noalias const tag = atom->tag;
|
||||
|
||||
int * _noalias const ilist = list->ilist;
|
||||
int * _noalias numneigh = list->numneigh;
|
||||
int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int nstencil = this->nstencil;
|
||||
const int * _noalias const stencil = this->stencil;
|
||||
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
|
||||
const flt_t * _noalias const cutneighghostsq =
|
||||
buffers->get_cutneighghostsq()[0];
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int nlocal = atom->nlocal;
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
int * const mask = atom->mask;
|
||||
tagint * const molecule = atom->molecule;
|
||||
#endif
|
||||
|
||||
int *molindex = atom->molindex;
|
||||
int *molatom = atom->molatom;
|
||||
Molecule **onemols = atom->avec->onemols;
|
||||
int moltemplate;
|
||||
if (molecular == 2) moltemplate = 1;
|
||||
else moltemplate = 0;
|
||||
if (moltemplate)
|
||||
error->all(FLERR,
|
||||
"Can't use moltemplate with npair style full/bin/ghost/intel.");
|
||||
|
||||
int tnum;
|
||||
int *overflow;
|
||||
double *timer_compute;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
timer_compute = _fix->off_watch_neighbor();
|
||||
tnum = buffers->get_off_threads();
|
||||
overflow = _fix->get_off_overflow_flag();
|
||||
_fix->stop_watch(TIME_HOST_NEIGHBOR);
|
||||
_fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
tnum = comm->nthreads;
|
||||
overflow = _fix->get_overflow_flag();
|
||||
}
|
||||
const int nthreads = tnum;
|
||||
const int maxnbors = buffers->get_max_nbors();
|
||||
int * _noalias const atombin = buffers->get_atombin();
|
||||
const int * _noalias const binpacked = buffers->get_binpacked();
|
||||
|
||||
const int xperiodic = domain->xperiodic;
|
||||
const int yperiodic = domain->yperiodic;
|
||||
const int zperiodic = domain->zperiodic;
|
||||
const flt_t xprd_half = domain->xprd_half;
|
||||
const flt_t yprd_half = domain->yprd_half;
|
||||
const flt_t zprd_half = domain->zprd_half;
|
||||
|
||||
flt_t * _noalias const ncachex = buffers->get_ncachex();
|
||||
flt_t * _noalias const ncachey = buffers->get_ncachey();
|
||||
flt_t * _noalias const ncachez = buffers->get_ncachez();
|
||||
int * _noalias const ncachej = buffers->get_ncachej();
|
||||
int * _noalias const ncachejtype = buffers->get_ncachejtype();
|
||||
int * _noalias const ncachetag = buffers->get_ncachetag();
|
||||
const int ncache_stride = buffers->ncache_stride();
|
||||
|
||||
const int mbinx = this->mbinx;
|
||||
const int mbiny = this->mbiny;
|
||||
const int mbinz = this->mbinz;
|
||||
const int * const stencilxyz = &this->stencilxyz[0][0];
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
const int * _noalias const binhead = this->binhead;
|
||||
const int * _noalias const bins = this->bins;
|
||||
const int cop = _fix->coprocessor_number();
|
||||
const int separate_buffers = _fix->separate_buffers();
|
||||
#pragma offload target(mic:cop) if(offload) \
|
||||
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
|
||||
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
|
||||
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
|
||||
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
|
||||
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
|
||||
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
|
||||
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cutneighghostsq:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ilist:length(0) alloc_if(0) free_if(0)) \
|
||||
in(atombin:length(aend) alloc_if(0) free_if(0)) \
|
||||
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
|
||||
in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ncachejtype,ncachetag:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
|
||||
in(separate_buffers,aend,nlocal,molecular,ntypes,mbinx,mbiny) \
|
||||
in(mbinz,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
|
||||
in(stencilxyz:length(3*nstencil)) \
|
||||
out(overflow:length(5) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
signal(tag)
|
||||
#endif
|
||||
{
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime();
|
||||
#endif
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
overflow[LMP_LOCAL_MIN] = 0;
|
||||
overflow[LMP_LOCAL_MAX] = aend - 1;
|
||||
overflow[LMP_GHOST_MIN] = e_nall;
|
||||
overflow[LMP_GHOST_MAX] = -1;
|
||||
#endif
|
||||
|
||||
int nstencilp = 0;
|
||||
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
binstart[nstencilp] = stencil[k];
|
||||
int end = stencil[k] + 1;
|
||||
for (int kk = k + 1; kk < nstencil; kk++) {
|
||||
if (stencil[kk-1]+1 == stencil[kk]) {
|
||||
end++;
|
||||
k++;
|
||||
} else break;
|
||||
}
|
||||
binend[nstencilp] = end;
|
||||
nstencilp++;
|
||||
}
|
||||
|
||||
const int mbinyx = mbiny * mbinx;
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel
|
||||
#endif
|
||||
{
|
||||
const int num = aend;
|
||||
int tid, ifrom, ito;
|
||||
|
||||
const double balance_factor = 2.0;
|
||||
const double ibalance_factor = 1.0 / balance_factor;
|
||||
const int gnum = num - nlocal;
|
||||
const int wlocal = static_cast<int>(ceil(balance_factor * nlocal));
|
||||
const int snum = wlocal + gnum;
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, snum, nthreads);
|
||||
if (ifrom < wlocal) ifrom = static_cast<int>(ibalance_factor * ifrom);
|
||||
else ifrom -= wlocal - nlocal;
|
||||
if (ito < wlocal) ito = static_cast<int>(ibalance_factor * ito);
|
||||
else ito -= wlocal - nlocal;
|
||||
|
||||
int e_ito = ito;
|
||||
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
|
||||
|
||||
int which;
|
||||
|
||||
int pack_offset = maxnbors;
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
const int obound = pack_offset + maxnbors * 2;
|
||||
|
||||
const int toffs = tid * ncache_stride;
|
||||
flt_t * _noalias const tx = ncachex + toffs;
|
||||
flt_t * _noalias const ty = ncachey + toffs;
|
||||
flt_t * _noalias const tz = ncachez + toffs;
|
||||
int * _noalias const tj = ncachej + toffs;
|
||||
int * _noalias const tjtype = ncachejtype + toffs;
|
||||
int * _noalias const ttag = ncachetag + toffs;
|
||||
|
||||
// loop over all atoms in other bins in stencil, store every pair
|
||||
int istart, icount, ncount, oldbin = -9999999, lane, max_chunk;
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
const int itype = x[i].w;
|
||||
const tagint itag = tag[i];
|
||||
const int ioffset = ntypes * itype;
|
||||
|
||||
const int ibin = atombin[i];
|
||||
if (ibin != oldbin) {
|
||||
oldbin = ibin;
|
||||
ncount = 0;
|
||||
if (i < nlocal) {
|
||||
for (int k = 0; k < nstencilp; k++) {
|
||||
const int bstart = binhead[ibin + binstart[k]];
|
||||
const int bend = binhead[ibin + binend[k]];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++)
|
||||
tj[ncount++] = binpacked[jj];
|
||||
}
|
||||
} else {
|
||||
const int zbin = ibin / mbinyx;
|
||||
const int zrem = ibin % mbinyx;
|
||||
const int ybin = zrem / mbinx;
|
||||
const int xbin = zrem % mbinx;
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
const int xbin2 = xbin + stencilxyz[3 * k + 0];
|
||||
const int ybin2 = ybin + stencilxyz[3 * k + 1];
|
||||
const int zbin2 = zbin + stencilxyz[3 * k + 2];
|
||||
if (xbin2 < 0 || xbin2 >= mbinx ||
|
||||
ybin2 < 0 || ybin2 >= mbiny ||
|
||||
zbin2 < 0 || zbin2 >= mbinz) continue;
|
||||
|
||||
const int bstart = binhead[ibin + stencil[k]];
|
||||
const int bend = binhead[ibin + stencil[k] + 1];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int jj = bstart; jj < bend; jj++)
|
||||
tj[ncount++] = binpacked[jj];
|
||||
}
|
||||
} // if i < nlocal
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int u = 0; u < ncount; u++) {
|
||||
const int j = tj[u];
|
||||
tx[u] = x[j].x;
|
||||
ty[u] = x[j].y;
|
||||
tz[u] = x[j].z;
|
||||
tjtype[u] = x[j].w;
|
||||
ttag[u] = tag[j];
|
||||
}
|
||||
} // if ibin != oldbin
|
||||
|
||||
// ---------------------- Loop over other bins
|
||||
|
||||
int n = maxnbors;
|
||||
int n2 = n * 2;
|
||||
int *neighptr2 = neighptr;
|
||||
const flt_t * _noalias cutsq;
|
||||
if (i < nlocal) cutsq = cutneighsq;
|
||||
else cutsq = cutneighghostsq;
|
||||
|
||||
const int icp = i;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int u = 0; u < ncount; u++) {
|
||||
int addme = 1;
|
||||
int j = tj[u];
|
||||
|
||||
if (i == j) addme = 0;
|
||||
|
||||
// Cutoff Check
|
||||
const flt_t delx = xtmp - tx[u];
|
||||
const flt_t dely = ytmp - ty[u];
|
||||
const flt_t delz = ztmp - tz[u];
|
||||
const int jtype = tjtype[u];
|
||||
const int jtag = ttag[u];
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq > cutsq[ioffset + jtype]) addme = 0;
|
||||
|
||||
if (need_ic && icp < nlocal) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
j = -j - 1;
|
||||
}
|
||||
|
||||
int flist = 0;
|
||||
if (itag > jtag) {
|
||||
if (((itag+jtag) & 1) == 0) flist = 1;
|
||||
} else if (itag < jtag) {
|
||||
if (((itag+jtag) & 1) == 1) flist = 1;
|
||||
} else {
|
||||
if (tz[u] < ztmp) flist = 1;
|
||||
else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1;
|
||||
else if (tz[u] == ztmp && ty[u] == ytmp && tx[u] < xtmp)
|
||||
flist = 1;
|
||||
}
|
||||
if (addme) {
|
||||
if (flist)
|
||||
neighptr2[n2++] = j;
|
||||
else
|
||||
neighptr[n++] = j;
|
||||
}
|
||||
} // for u
|
||||
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
if (exclude) {
|
||||
int alln = n;
|
||||
n = maxnbors;
|
||||
for (int u = pack_offset; u < alln; u++) {
|
||||
const int j = neighptr[u];
|
||||
int pj = j;
|
||||
if (need_ic)
|
||||
if (pj < 0) pj = -j - 1;
|
||||
const int jtype = x[pj].w;
|
||||
if (exclusion(i,pj,itype,jtype,mask,molecule)) continue;
|
||||
neighptr[n++] = j;
|
||||
}
|
||||
alln = n2;
|
||||
n2 = maxnbors * 2;
|
||||
for (int u = n2; u < alln; u++) {
|
||||
const int j = neighptr[u];
|
||||
int pj = j;
|
||||
if (need_ic)
|
||||
if (pj < 0) pj = -j - 1;
|
||||
const int jtype = x[pj].w;
|
||||
if (exclusion(i,pj,itype,jtype,mask,molecule)) continue;
|
||||
neighptr[n2++] = j;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
int ns = n - maxnbors;
|
||||
int alln = n;
|
||||
atombin[i] = ns;
|
||||
n = 0;
|
||||
for (int u = maxnbors; u < alln; u++)
|
||||
neighptr[n++] = neighptr[u];
|
||||
ns += n2 - maxnbors * 2;
|
||||
for (int u = maxnbors * 2; u < n2; u++)
|
||||
neighptr[n++] = neighptr[u];
|
||||
if (ns > maxnbors) *overflow = 1;
|
||||
|
||||
ilist[i] = i;
|
||||
cnumneigh[i] = ct;
|
||||
numneigh[i] = ns;
|
||||
|
||||
ct += ns;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
const int edge = ct & (alignb - 1);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid * 2) * maxnbors;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*overflow == 1)
|
||||
for (int i = ifrom; i < ito; i++)
|
||||
numneigh[i] = 0;
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int ghost_offset = 0, nall_offset = e_nall;
|
||||
if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
#if __INTEL_COMPILER+0 > 1499
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = jlist[jj];
|
||||
if (need_ic && j < 0) j = -j - 1;
|
||||
}
|
||||
}
|
||||
|
||||
overflow[LMP_LOCAL_MIN] = 0;
|
||||
overflow[LMP_LOCAL_MAX] = nlocal - 1;
|
||||
overflow[LMP_GHOST_MIN] = nlocal;
|
||||
overflow[LMP_GHOST_MAX] = e_nall - 1;
|
||||
|
||||
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
|
||||
if (nghost < 0) nghost = 0;
|
||||
if (offload) {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
|
||||
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
|
||||
} else {
|
||||
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
|
||||
nall_offset = nlocal + nghost;
|
||||
}
|
||||
} // if separate_buffers
|
||||
#endif
|
||||
|
||||
if (molecular) {
|
||||
int ito_m = ito;
|
||||
if (ito >= nlocal) ito_m = nlocal;
|
||||
for (int i = ifrom; i < ito_m; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
if (need_ic && j < 0) {
|
||||
which = 0;
|
||||
jlist[jj] = -j - 1;
|
||||
} else
|
||||
ofind_special(which, special, nspecial, i, tag[j]);
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j >= nlocal) {
|
||||
if (j == e_nall)
|
||||
jlist[jj] = nall_offset;
|
||||
else if (which)
|
||||
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
|
||||
else jlist[jj]-=ghost_offset;
|
||||
} else
|
||||
#endif
|
||||
if (which) jlist[jj] = j ^ (which << SBBITS);
|
||||
}
|
||||
} // for i
|
||||
} // if molecular
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
else if (separate_buffers) {
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
int jj = 0;
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
if (jlist[jj] >= nlocal) {
|
||||
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
|
||||
else jlist[jj] -= ghost_offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // end omp
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||
#endif
|
||||
} // end offload
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (offload) {
|
||||
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
_fix->start_watch(TIME_HOST_NEIGHBOR);
|
||||
for (int n = 0; n < aend; n++) {
|
||||
ilist[n] = n;
|
||||
numneigh[n] = 0;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
if (separate_buffers) {
|
||||
_fix->start_watch(TIME_PACK);
|
||||
_fix->set_neighbor_host_sizes();
|
||||
buffers->pack_sep_from_single(_fix->host_min_local(),
|
||||
_fix->host_used_local(),
|
||||
_fix->host_min_ghost(),
|
||||
_fix->host_used_ghost());
|
||||
_fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
}
|
||||
#else
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
for (int i = 0; i < aend; i++)
|
||||
list->firstneigh[i] = firstneigh + cnumneigh[i];
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,55 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef NPAIR_CLASS
|
||||
|
||||
NPairStyle(full/bin/ghost/intel,
|
||||
NPairFullBinGhostIntel,
|
||||
NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF |
|
||||
NP_ORTHO | NP_TRI | NP_INTEL)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
|
||||
#define LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
|
||||
|
||||
#include "npair_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class NPairFullBinGhostIntel : public NPairIntel {
|
||||
public:
|
||||
NPairFullBinGhostIntel(class LAMMPS *);
|
||||
~NPairFullBinGhostIntel() {}
|
||||
void build(class NeighList *);
|
||||
private:
|
||||
template<class flt_t, class acc_t>
|
||||
void fbi(NeighList * list, IntelBuffers<flt_t,acc_t> * buffers);
|
||||
template<class flt_t, class acc_t, int need_ic>
|
||||
void fbi(const int offload, NeighList * list,
|
||||
IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const int astart, const int aend);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
|
@ -143,6 +143,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
flt_t * _noalias const ncachez = buffers->get_ncachez();
|
||||
int * _noalias const ncachej = buffers->get_ncachej();
|
||||
int * _noalias const ncachejtype = buffers->get_ncachejtype();
|
||||
int * _noalias const ncachetag = buffers->get_ncachetag();
|
||||
const int ncache_stride = buffers->ncache_stride();
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
|
@ -165,7 +166,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
in(atombin:length(aend) alloc_if(0) free_if(0)) \
|
||||
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
|
||||
in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ncachejtype:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ncachejtype,ncachetag:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
|
||||
in(pad_width,offload_end,separate_buffers,astart,aend,nlocal,molecular) \
|
||||
in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
|
||||
|
@ -222,7 +223,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
ito += astart;
|
||||
int e_ito = ito;
|
||||
if (THREE && ito == num) {
|
||||
int imod = ito % pack_width;
|
||||
int imod = ito & (pack_width - 1);
|
||||
if (imod) e_ito += pack_width - imod;
|
||||
}
|
||||
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
|
||||
|
@ -241,6 +242,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
flt_t * _noalias const tz = ncachez + toffs;
|
||||
int * _noalias const tj = ncachej + toffs;
|
||||
int * _noalias const tjtype = ncachejtype + toffs;
|
||||
int * _noalias const ttag = ncachetag + toffs;
|
||||
|
||||
flt_t * _noalias itx;
|
||||
flt_t * _noalias ity;
|
||||
|
@ -287,13 +289,14 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
ty[u] = x[j].y;
|
||||
tz[u] = x[j].z;
|
||||
tjtype[u] = x[j].w;
|
||||
if (THREE) ttag[u] = tag[j];
|
||||
}
|
||||
|
||||
if (FULL == 0 || TRI == 1) {
|
||||
icount = 0;
|
||||
istart = ncount;
|
||||
const int alignb = INTEL_DATA_ALIGN / sizeof(int);
|
||||
int nedge = istart % alignb;
|
||||
int nedge = istart & (alignb - 1);
|
||||
if (nedge) istart + (alignb - nedge);
|
||||
itx = tx + istart;
|
||||
ity = ty + istart;
|
||||
|
@ -343,7 +346,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
|
||||
// i bin (half) check and offload ghost check
|
||||
if (j < nlocal) {
|
||||
const int ijmod = (i + j) % 2;
|
||||
const int ijmod = (i + j) & 1;
|
||||
if (i > j) {
|
||||
if (ijmod == 0) addme = 0;
|
||||
} else if (i < j) {
|
||||
|
@ -424,8 +427,6 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
}
|
||||
#endif
|
||||
|
||||
int pj;
|
||||
if (THREE) pj = j;
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
|
@ -434,12 +435,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
}
|
||||
|
||||
if (THREE) {
|
||||
const int jtag = tag[pj];
|
||||
const int jtag = ttag[u];
|
||||
int flist = 0;
|
||||
if (itag > jtag) {
|
||||
if ((itag+jtag) % 2 == 0) flist = 1;
|
||||
if (((itag+jtag) & 1) == 0) flist = 1;
|
||||
} else if (itag < jtag) {
|
||||
if ((itag+jtag) % 2 == 1) flist = 1;
|
||||
if (((itag+jtag) & 1) == 1) flist = 1;
|
||||
} else {
|
||||
if (tz[u] < ztmp) flist = 1;
|
||||
else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1;
|
||||
|
@ -512,7 +513,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
cnumneigh[i] += lane;
|
||||
numneigh[i] = ns;
|
||||
} else {
|
||||
int edge = (n % pad_width);
|
||||
int edge = n & (pad_width - 1);
|
||||
if (edge) {
|
||||
const int pad_end = n + (pad_width - edge);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
|
@ -532,7 +533,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
if (lane == pack_width) {
|
||||
ct += max_chunk * pack_width;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
const int edge = (ct % alignb);
|
||||
const int edge = ct & (alignb - 1);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
max_chunk = 0;
|
||||
|
@ -548,7 +549,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
|
|||
} else {
|
||||
ct += n;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
const int edge = (ct % alignb);
|
||||
const int edge = ct & (alignb - 1);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
if (ct + obound > list_size) {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,110 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Markus Hohnerbach (RWTH)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(airebo/intel,PairAIREBOIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_AIREBO_INTEL_H
|
||||
#define LMP_PAIR_AIREBO_INTEL_H
|
||||
|
||||
#include "pair.h"
|
||||
#include "fix_intel.h"
|
||||
#include "pair_airebo.h"
|
||||
//#include "airebo_common.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class flt_t, class acc_t>
|
||||
struct PairAIREBOIntelParam;
|
||||
|
||||
class PairAIREBOIntel : public PairAIREBO {
|
||||
public:
|
||||
PairAIREBOIntel(class LAMMPS *);
|
||||
virtual ~PairAIREBOIntel();
|
||||
virtual void compute(int, int);
|
||||
virtual void init_style();
|
||||
protected:
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers);
|
||||
|
||||
template <int EVFLAG, int EFLAG, class flt_t, class acc_t>
|
||||
void eval(const int offload, const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const int astart, const int aend);
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void pack_force_const(IntelBuffers<flt_t,acc_t> * buffers);
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
PairAIREBOIntelParam<flt_t,acc_t> get_param();
|
||||
|
||||
FixIntel * fix;
|
||||
int _cop;
|
||||
|
||||
int * REBO_cnumneigh;
|
||||
int * REBO_num_skin;
|
||||
int * REBO_list_data;
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Incorrect args for pair coefficients
|
||||
|
||||
Self-explanatory. Check the input script or data file.
|
||||
|
||||
E: Pair style AIREBO requires atom IDs
|
||||
|
||||
This is a requirement to use the AIREBO potential.
|
||||
|
||||
E: Pair style AIREBO requires newton pair on
|
||||
|
||||
See the newton command. This is a restriction to use the AIREBO
|
||||
potential.
|
||||
|
||||
E: All pair coeffs are not set
|
||||
|
||||
All pair coefficients must be set in the data file or by the
|
||||
pair_coeff command before running a simulation.
|
||||
|
||||
E: Neighbor list overflow, boost neigh_modify one
|
||||
|
||||
There are too many neighbors of a single atom. Use the neigh_modify
|
||||
command to increase the max number of neighbors allowed for one atom.
|
||||
You may also want to boost the page size.
|
||||
|
||||
E: Cannot open AIREBO potential file %s
|
||||
|
||||
The specified AIREBO potential file cannot be opened. Check that the
|
||||
path and name are correct.
|
||||
|
||||
*/
|
|
@ -0,0 +1,37 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Markus Hohnerbach (RWTH)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_airebo_morse_intel.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairAIREBOMorseIntel::PairAIREBOMorseIntel(LAMMPS *lmp)
|
||||
: PairAIREBOIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairAIREBOMorseIntel::settings(int narg, char **arg)
|
||||
{
|
||||
PairAIREBOIntel::settings(narg,arg);
|
||||
|
||||
morseflag = 1;
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Markus Hohnerbach (RWTH)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(airebo/morse/intel,PairAIREBOMorseIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_AIREBO_MORSE_INTEL_H
|
||||
#define LMP_PAIR_AIREBO_MORSE_INTEL_H
|
||||
|
||||
#include "pair_airebo_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairAIREBOMorseIntel : public PairAIREBOIntel {
|
||||
public:
|
||||
PairAIREBOMorseIntel(class LAMMPS *);
|
||||
virtual void settings(int, char **);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,326 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pair_eam_alloy_intel.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairEAMAlloyIntel::PairEAMAlloyIntel(LAMMPS *lmp) : PairEAMIntel(lmp)
|
||||
{
|
||||
one_coeff = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
read DYNAMO setfl file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMAlloyIntel::coeff(int narg, char **arg)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
if (!allocated) allocate();
|
||||
|
||||
if (narg != 3 + atom->ntypes)
|
||||
error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
|
||||
// insure I,J args are * *
|
||||
|
||||
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
|
||||
error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
|
||||
// read EAM setfl file
|
||||
|
||||
if (setfl) {
|
||||
for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
|
||||
delete [] setfl->elements;
|
||||
delete [] setfl->mass;
|
||||
memory->destroy(setfl->frho);
|
||||
memory->destroy(setfl->rhor);
|
||||
memory->destroy(setfl->z2r);
|
||||
delete setfl;
|
||||
}
|
||||
setfl = new Setfl();
|
||||
read_file(arg[2]);
|
||||
|
||||
// read args that map atom types to elements in potential file
|
||||
// map[i] = which element the Ith atom type is, -1 if NULL
|
||||
|
||||
for (i = 3; i < narg; i++) {
|
||||
if (strcmp(arg[i],"NULL") == 0) {
|
||||
map[i-2] = -1;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < setfl->nelements; j++)
|
||||
if (strcmp(arg[i],setfl->elements[j]) == 0) break;
|
||||
if (j < setfl->nelements) map[i-2] = j;
|
||||
else error->all(FLERR,"No matching element in EAM potential file");
|
||||
}
|
||||
|
||||
// clear setflag since coeff() called once with I,J = * *
|
||||
|
||||
int n = atom->ntypes;
|
||||
for (i = 1; i <= n; i++)
|
||||
for (j = i; j <= n; j++)
|
||||
setflag[i][j] = 0;
|
||||
|
||||
// set setflag i,j for type pairs where both are mapped to elements
|
||||
// set mass of atom type if i = j
|
||||
|
||||
int count = 0;
|
||||
for (i = 1; i <= n; i++) {
|
||||
for (j = i; j <= n; j++) {
|
||||
if (map[i] >= 0 && map[j] >= 0) {
|
||||
setflag[i][j] = 1;
|
||||
if (i == j) atom->set_mass(FLERR,i,setfl->mass[map[i]]);
|
||||
count++;
|
||||
}
|
||||
scale[i][j] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
read a multi-element DYNAMO setfl file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMAlloyIntel::read_file(char *filename)
|
||||
{
|
||||
Setfl *file = setfl;
|
||||
|
||||
// open potential file
|
||||
|
||||
int me = comm->me;
|
||||
FILE *fptr;
|
||||
char line[MAXLINE];
|
||||
|
||||
if (me == 0) {
|
||||
fptr = force->open_potential(filename);
|
||||
if (fptr == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"Cannot open EAM potential file %s",filename);
|
||||
error->one(FLERR,str);
|
||||
}
|
||||
}
|
||||
|
||||
// read and broadcast header
|
||||
// extract element names from nelements line
|
||||
|
||||
int n;
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
n = strlen(line) + 1;
|
||||
}
|
||||
MPI_Bcast(&n,1,MPI_INT,0,world);
|
||||
MPI_Bcast(line,n,MPI_CHAR,0,world);
|
||||
|
||||
sscanf(line,"%d",&file->nelements);
|
||||
int nwords = atom->count_words(line);
|
||||
if (nwords != file->nelements + 1)
|
||||
error->all(FLERR,"Incorrect element names in EAM potential file");
|
||||
|
||||
char **words = new char*[file->nelements+1];
|
||||
nwords = 0;
|
||||
strtok(line," \t\n\r\f");
|
||||
while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
|
||||
|
||||
file->elements = new char*[file->nelements];
|
||||
for (int i = 0; i < file->nelements; i++) {
|
||||
n = strlen(words[i]) + 1;
|
||||
file->elements[i] = new char[n];
|
||||
strcpy(file->elements[i],words[i]);
|
||||
}
|
||||
delete [] words;
|
||||
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg %d %lg %lg",
|
||||
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
|
||||
}
|
||||
|
||||
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
|
||||
|
||||
file->mass = new double[file->nelements];
|
||||
memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
|
||||
memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
|
||||
memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
|
||||
"pair:z2r");
|
||||
|
||||
int i,j,tmp;
|
||||
for (i = 0; i < file->nelements; i++) {
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
|
||||
}
|
||||
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
|
||||
|
||||
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
|
||||
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
|
||||
if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
|
||||
MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
|
||||
for (i = 0; i < file->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
|
||||
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
|
||||
// close the potential file
|
||||
|
||||
if (me == 0) fclose(fptr);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy read-in setfl potential to standard array format
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMAlloyIntel::file2array()
|
||||
{
|
||||
int i,j,m,n;
|
||||
int ntypes = atom->ntypes;
|
||||
|
||||
// set function params directly from setfl file
|
||||
|
||||
nrho = setfl->nrho;
|
||||
nr = setfl->nr;
|
||||
drho = setfl->drho;
|
||||
dr = setfl->dr;
|
||||
rhomax = (nrho-1) * drho;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup frho arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate frho arrays
|
||||
// nfrho = # of setfl elements + 1 for zero array
|
||||
|
||||
nfrho = setfl->nelements + 1;
|
||||
memory->destroy(frho);
|
||||
memory->create(frho,nfrho,nrho+1,"pair:frho");
|
||||
|
||||
// copy each element's frho to global frho
|
||||
|
||||
for (i = 0; i < setfl->nelements; i++)
|
||||
for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
|
||||
|
||||
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
|
||||
// this is necessary b/c fp is still computed for non-EAM atoms
|
||||
|
||||
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
|
||||
|
||||
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
|
||||
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
|
||||
// then map it to last frho array of zeroes
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
if (map[i] >= 0) type2frho[i] = map[i];
|
||||
else type2frho[i] = nfrho-1;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup rhor arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate rhor arrays
|
||||
// nrhor = # of setfl elements
|
||||
|
||||
nrhor = setfl->nelements;
|
||||
memory->destroy(rhor);
|
||||
memory->create(rhor,nrhor,nr+1,"pair:rhor");
|
||||
|
||||
// copy each element's rhor to global rhor
|
||||
|
||||
for (i = 0; i < setfl->nelements; i++)
|
||||
for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
|
||||
|
||||
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
|
||||
// for setfl files, I,J mapping only depends on I
|
||||
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
for (j = 1; j <= ntypes; j++)
|
||||
type2rhor[i][j] = map[i];
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup z2r arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate z2r arrays
|
||||
// nz2r = N*(N+1)/2 where N = # of setfl elements
|
||||
|
||||
nz2r = setfl->nelements * (setfl->nelements+1) / 2;
|
||||
memory->destroy(z2r);
|
||||
memory->create(z2r,nz2r,nr+1,"pair:z2r");
|
||||
|
||||
// copy each element pair z2r to global z2r, only for I >= J
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < setfl->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
|
||||
n++;
|
||||
}
|
||||
|
||||
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
|
||||
// set of z2r arrays only fill lower triangular Nelement matrix
|
||||
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
|
||||
// swap indices when irow < icol to stay lower triangular
|
||||
// if map = -1 (non-EAM atom in pair hybrid):
|
||||
// type2z2r is not used by non-opt
|
||||
// but set type2z2r to 0 since accessed by opt
|
||||
|
||||
int irow,icol;
|
||||
for (i = 1; i <= ntypes; i++) {
|
||||
for (j = 1; j <= ntypes; j++) {
|
||||
irow = map[i];
|
||||
icol = map[j];
|
||||
if (irow == -1 || icol == -1) {
|
||||
type2z2r[i][j] = 0;
|
||||
continue;
|
||||
}
|
||||
if (irow < icol) {
|
||||
irow = map[j];
|
||||
icol = map[i];
|
||||
}
|
||||
n = 0;
|
||||
for (m = 0; m < irow; m++) n += m + 1;
|
||||
n += icol;
|
||||
type2z2r[i][j] = n;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(eam/alloy/intel,PairEAMAlloyIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_EAM_ALLOY_INTEL_H
|
||||
#define LMP_PAIR_EAM_ALLOY_INTEL_H
|
||||
|
||||
#include "pair_eam_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// need virtual public b/c of how eam/alloy/opt inherits from it
|
||||
|
||||
class PairEAMAlloyIntel : virtual public PairEAMIntel {
|
||||
public:
|
||||
PairEAMAlloyIntel(class LAMMPS *);
|
||||
virtual ~PairEAMAlloyIntel() {}
|
||||
void coeff(int, char **);
|
||||
|
||||
protected:
|
||||
void read_file(char *);
|
||||
void file2array();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,335 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Tim Lau (MIT)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "pair_eam_fs_intel.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define MAXLINE 1024
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairEAMFSIntel::PairEAMFSIntel(LAMMPS *lmp) : PairEAMIntel(lmp)
|
||||
{
|
||||
one_coeff = 1;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
set coeffs for one or more type pairs
|
||||
read EAM Finnis-Sinclair file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMFSIntel::coeff(int narg, char **arg)
|
||||
{
|
||||
int i,j;
|
||||
|
||||
if (!allocated) allocate();
|
||||
|
||||
if (narg != 3 + atom->ntypes)
|
||||
error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
|
||||
// insure I,J args are * *
|
||||
|
||||
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
|
||||
error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
|
||||
// read EAM Finnis-Sinclair file
|
||||
|
||||
if (fs) {
|
||||
for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
|
||||
delete [] fs->elements;
|
||||
delete [] fs->mass;
|
||||
memory->destroy(fs->frho);
|
||||
memory->destroy(fs->rhor);
|
||||
memory->destroy(fs->z2r);
|
||||
delete fs;
|
||||
}
|
||||
fs = new Fs();
|
||||
read_file(arg[2]);
|
||||
|
||||
// read args that map atom types to elements in potential file
|
||||
// map[i] = which element the Ith atom type is, -1 if NULL
|
||||
|
||||
for (i = 3; i < narg; i++) {
|
||||
if (strcmp(arg[i],"NULL") == 0) {
|
||||
map[i-2] = -1;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < fs->nelements; j++)
|
||||
if (strcmp(arg[i],fs->elements[j]) == 0) break;
|
||||
if (j < fs->nelements) map[i-2] = j;
|
||||
else error->all(FLERR,"No matching element in EAM potential file");
|
||||
}
|
||||
|
||||
// clear setflag since coeff() called once with I,J = * *
|
||||
|
||||
int n = atom->ntypes;
|
||||
for (i = 1; i <= n; i++)
|
||||
for (j = i; j <= n; j++)
|
||||
setflag[i][j] = 0;
|
||||
|
||||
// set setflag i,j for type pairs where both are mapped to elements
|
||||
// set mass of atom type if i = j
|
||||
|
||||
int count = 0;
|
||||
for (i = 1; i <= n; i++) {
|
||||
for (j = i; j <= n; j++) {
|
||||
if (map[i] >= 0 && map[j] >= 0) {
|
||||
setflag[i][j] = 1;
|
||||
if (i == j) atom->set_mass(FLERR,i,fs->mass[map[i]]);
|
||||
count++;
|
||||
}
|
||||
scale[i][j] = 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
read a multi-element DYNAMO setfl file
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMFSIntel::read_file(char *filename)
|
||||
{
|
||||
Fs *file = fs;
|
||||
|
||||
// open potential file
|
||||
|
||||
int me = comm->me;
|
||||
FILE *fptr;
|
||||
char line[MAXLINE];
|
||||
|
||||
if (me == 0) {
|
||||
fptr = force->open_potential(filename);
|
||||
if (fptr == NULL) {
|
||||
char str[128];
|
||||
sprintf(str,"Cannot open EAM potential file %s",filename);
|
||||
error->one(FLERR,str);
|
||||
}
|
||||
}
|
||||
|
||||
// read and broadcast header
|
||||
// extract element names from nelements line
|
||||
|
||||
int n;
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
fgets(line,MAXLINE,fptr);
|
||||
n = strlen(line) + 1;
|
||||
}
|
||||
MPI_Bcast(&n,1,MPI_INT,0,world);
|
||||
MPI_Bcast(line,n,MPI_CHAR,0,world);
|
||||
|
||||
sscanf(line,"%d",&file->nelements);
|
||||
int nwords = atom->count_words(line);
|
||||
if (nwords != file->nelements + 1)
|
||||
error->all(FLERR,"Incorrect element names in EAM potential file");
|
||||
|
||||
char **words = new char*[file->nelements+1];
|
||||
nwords = 0;
|
||||
strtok(line," \t\n\r\f");
|
||||
while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
|
||||
|
||||
file->elements = new char*[file->nelements];
|
||||
for (int i = 0; i < file->nelements; i++) {
|
||||
n = strlen(words[i]) + 1;
|
||||
file->elements[i] = new char[n];
|
||||
strcpy(file->elements[i],words[i]);
|
||||
}
|
||||
delete [] words;
|
||||
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg %d %lg %lg",
|
||||
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
|
||||
}
|
||||
|
||||
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
|
||||
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
|
||||
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
|
||||
|
||||
file->mass = new double[file->nelements];
|
||||
memory->create(file->frho,file->nelements,file->nrho+1,
|
||||
"pair:frho");
|
||||
memory->create(file->rhor,file->nelements,file->nelements,
|
||||
file->nr+1,"pair:rhor");
|
||||
memory->create(file->z2r,file->nelements,file->nelements,
|
||||
file->nr+1,"pair:z2r");
|
||||
|
||||
int i,j,tmp;
|
||||
for (i = 0; i < file->nelements; i++) {
|
||||
if (me == 0) {
|
||||
fgets(line,MAXLINE,fptr);
|
||||
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
|
||||
}
|
||||
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
|
||||
|
||||
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
|
||||
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
|
||||
|
||||
for (j = 0; j < file->nelements; j++) {
|
||||
if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
|
||||
MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < file->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
|
||||
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
|
||||
}
|
||||
|
||||
// close the potential file
|
||||
|
||||
if (me == 0) fclose(fptr);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy read-in setfl potential to standard array format
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairEAMFSIntel::file2array()
|
||||
{
|
||||
int i,j,m,n;
|
||||
int ntypes = atom->ntypes;
|
||||
|
||||
// set function params directly from fs file
|
||||
|
||||
nrho = fs->nrho;
|
||||
nr = fs->nr;
|
||||
drho = fs->drho;
|
||||
dr = fs->dr;
|
||||
rhomax = (nrho-1) * drho;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup frho arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate frho arrays
|
||||
// nfrho = # of fs elements + 1 for zero array
|
||||
|
||||
nfrho = fs->nelements + 1;
|
||||
memory->destroy(frho);
|
||||
memory->create(frho,nfrho,nrho+1,"pair:frho");
|
||||
|
||||
// copy each element's frho to global frho
|
||||
|
||||
for (i = 0; i < fs->nelements; i++)
|
||||
for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
|
||||
|
||||
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
|
||||
// this is necessary b/c fp is still computed for non-EAM atoms
|
||||
|
||||
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
|
||||
|
||||
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
|
||||
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
|
||||
// then map it to last frho array of zeroes
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
if (map[i] >= 0) type2frho[i] = map[i];
|
||||
else type2frho[i] = nfrho-1;
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup rhor arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate rhor arrays
|
||||
// nrhor = square of # of fs elements
|
||||
|
||||
nrhor = fs->nelements * fs->nelements;
|
||||
memory->destroy(rhor);
|
||||
memory->create(rhor,nrhor,nr+1,"pair:rhor");
|
||||
|
||||
// copy each element pair rhor to global rhor
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < fs->nelements; i++)
|
||||
for (j = 0; j < fs->nelements; j++) {
|
||||
for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
|
||||
n++;
|
||||
}
|
||||
|
||||
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
|
||||
// for fs files, there is a full NxN set of rhor arrays
|
||||
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
|
||||
|
||||
for (i = 1; i <= ntypes; i++)
|
||||
for (j = 1; j <= ntypes; j++)
|
||||
type2rhor[i][j] = map[i] * fs->nelements + map[j];
|
||||
|
||||
// ------------------------------------------------------------------
|
||||
// setup z2r arrays
|
||||
// ------------------------------------------------------------------
|
||||
|
||||
// allocate z2r arrays
|
||||
// nz2r = N*(N+1)/2 where N = # of fs elements
|
||||
|
||||
nz2r = fs->nelements * (fs->nelements+1) / 2;
|
||||
memory->destroy(z2r);
|
||||
memory->create(z2r,nz2r,nr+1,"pair:z2r");
|
||||
|
||||
// copy each element pair z2r to global z2r, only for I >= J
|
||||
|
||||
n = 0;
|
||||
for (i = 0; i < fs->nelements; i++)
|
||||
for (j = 0; j <= i; j++) {
|
||||
for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
|
||||
n++;
|
||||
}
|
||||
|
||||
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
|
||||
// set of z2r arrays only fill lower triangular Nelement matrix
|
||||
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
|
||||
// swap indices when irow < icol to stay lower triangular
|
||||
// if map = -1 (non-EAM atom in pair hybrid):
|
||||
// type2z2r is not used by non-opt
|
||||
// but set type2z2r to 0 since accessed by opt
|
||||
|
||||
int irow,icol;
|
||||
for (i = 1; i <= ntypes; i++) {
|
||||
for (j = 1; j <= ntypes; j++) {
|
||||
irow = map[i];
|
||||
icol = map[j];
|
||||
if (irow == -1 || icol == -1) {
|
||||
type2z2r[i][j] = 0;
|
||||
continue;
|
||||
}
|
||||
if (irow < icol) {
|
||||
irow = map[j];
|
||||
icol = map[i];
|
||||
}
|
||||
n = 0;
|
||||
for (m = 0; m < irow; m++) n += m + 1;
|
||||
n += icol;
|
||||
type2z2r[i][j] = n;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,43 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(eam/fs/intel,PairEAMFSIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_EAM_FS_INTEL_H
|
||||
#define LMP_PAIR_EAM_FS_INTEL_H
|
||||
|
||||
#include "pair_eam_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
// need virtual public b/c of how eam/fs/opt inherits from it
|
||||
|
||||
class PairEAMFSIntel : virtual public PairEAMIntel {
|
||||
public:
|
||||
PairEAMFSIntel(class LAMMPS *);
|
||||
virtual ~PairEAMFSIntel() {}
|
||||
void coeff(int, char **);
|
||||
|
||||
protected:
|
||||
void read_file(char *);
|
||||
void file2array();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -428,7 +428,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
|
|||
} else
|
||||
multiple_forms = true;
|
||||
}
|
||||
const int edge = (packed_j % pad_width);
|
||||
const int edge = packed_j & (pad_width - 1);
|
||||
if (edge) {
|
||||
const int packed_end = packed_j + (pad_width - edge);
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
|
|
|
@ -0,0 +1,595 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
This software is distributed under the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include "pair_lj_charmm_coul_charmm_intel.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "group.h"
|
||||
#include "memory.h"
|
||||
#include "modify.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "memory.h"
|
||||
#include "suffix.h"
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define LJ_T typename IntelBuffers<flt_t,flt_t>::vec4_t
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCharmmCoulCharmmIntel::PairLJCharmmCoulCharmmIntel(LAMMPS *lmp) :
|
||||
PairLJCharmmCoulCharmm(lmp)
|
||||
{
|
||||
suffix_flag |= Suffix::INTEL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairLJCharmmCoulCharmmIntel::~PairLJCharmmCoulCharmmIntel()
|
||||
{
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag)
|
||||
{
|
||||
if (fix->precision()==FixIntel::PREC_MODE_MIXED)
|
||||
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||
force_const_single);
|
||||
else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE)
|
||||
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||
force_const_double);
|
||||
else
|
||||
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||
force_const_single);
|
||||
|
||||
fix->balance_stamp();
|
||||
vflag_fdotr = 0;
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
{
|
||||
if (eflag || vflag) {
|
||||
ev_setup(eflag,vflag);
|
||||
} else evflag = vflag_fdotr = 0;
|
||||
|
||||
const int inum = list->inum;
|
||||
const int nthreads = comm->nthreads;
|
||||
const int host_start = fix->host_start_pair();
|
||||
const int offload_end = fix->offload_end_pair();
|
||||
const int ago = neighbor->ago;
|
||||
|
||||
if (ago != 0 && fix->separate_buffers() == 0) {
|
||||
fix->start_watch(TIME_PACK);
|
||||
|
||||
int packthreads;
|
||||
if (nthreads > INTEL_HTHREADS) packthreads = nthreads;
|
||||
else packthreads = 1;
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel if(packthreads > 1)
|
||||
#endif
|
||||
{
|
||||
int ifrom, ito, tid;
|
||||
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
|
||||
packthreads, sizeof(ATOM_T));
|
||||
buffers->thr_pack(ifrom,ito,ago);
|
||||
}
|
||||
fix->stop_watch(TIME_PACK);
|
||||
}
|
||||
|
||||
// -------------------- Regular version
|
||||
int ovflag = 0;
|
||||
if (vflag_fdotr) ovflag = 2;
|
||||
else if (vflag) ovflag = 1;
|
||||
if (eflag) {
|
||||
if (force->newton_pair) {
|
||||
eval<1,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<1,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||
} else {
|
||||
eval<1,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<1,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||
}
|
||||
} else {
|
||||
if (force->newton_pair) {
|
||||
eval<0,1>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<0,1>(0, ovflag, buffers, fc, host_start, inum);
|
||||
} else {
|
||||
eval<0,0>(1, ovflag, buffers, fc, 0, offload_end);
|
||||
eval<0,0>(0, ovflag, buffers, fc, host_start, inum);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||
void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc,
|
||||
const int astart, const int aend)
|
||||
{
|
||||
const int inum = aend - astart;
|
||||
if (inum == 0) return;
|
||||
int nlocal, nall, minlocal;
|
||||
fix->get_buffern(offload, nlocal, nall, minlocal);
|
||||
|
||||
const int ago = neighbor->ago;
|
||||
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
|
||||
|
||||
ATOM_T * _noalias const x = buffers->get_x(offload);
|
||||
flt_t * _noalias const q = buffers->get_q(offload);
|
||||
|
||||
const int * _noalias const numneigh = list->numneigh;
|
||||
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
|
||||
const int * _noalias const firstneigh = buffers->firstneigh(list);
|
||||
|
||||
const flt_t * _noalias const special_coul = fc.special_coul;
|
||||
const flt_t * _noalias const special_lj = fc.special_lj;
|
||||
const flt_t qqrd2e = force->qqrd2e;
|
||||
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
|
||||
const flt_t inv_denom_coul = (flt_t)1.0/denom_coul;
|
||||
|
||||
const flt_t * _noalias const cutsq = fc.cutsq[0];
|
||||
const LJ_T * _noalias const lj = fc.lj[0];
|
||||
const flt_t cut_ljsq = fc.cut_ljsq;
|
||||
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
|
||||
const flt_t cut_coul_innersq = fc.cut_coul_innersq;
|
||||
const flt_t cut_coulsq = fc.cut_coulsq;
|
||||
|
||||
const int ntypes = atom->ntypes + 1;
|
||||
const int eatom = this->eflag_atom;
|
||||
|
||||
flt_t * _noalias const ccachex = buffers->get_ccachex();
|
||||
flt_t * _noalias const ccachey = buffers->get_ccachey();
|
||||
flt_t * _noalias const ccachez = buffers->get_ccachez();
|
||||
flt_t * _noalias const ccachew = buffers->get_ccachew();
|
||||
int * _noalias const ccachei = buffers->get_ccachei();
|
||||
int * _noalias const ccachej = buffers->get_ccachej();
|
||||
const int ccache_stride = _ccache_stride;
|
||||
|
||||
// Determine how much data to transfer
|
||||
int x_size, q_size, f_stride, ev_size, separate_flag;
|
||||
IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag,
|
||||
buffers, offload, fix, separate_flag,
|
||||
x_size, q_size, ev_size, f_stride);
|
||||
|
||||
int tc;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
|
||||
|
||||
const int nthreads = tc;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int *overflow = fix->get_off_overflow_flag();
|
||||
double *timer_compute = fix->off_watch_pair();
|
||||
|
||||
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
|
||||
#pragma offload target(mic:_cop) if(offload) \
|
||||
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \
|
||||
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(numneigh:length(0) alloc_if(0) free_if(0)) \
|
||||
in(x:length(x_size) alloc_if(0) free_if(0)) \
|
||||
in(q:length(q_size) alloc_if(0) free_if(0)) \
|
||||
in(overflow:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
|
||||
in(ccache_stride,nthreads,qqrd2e,inum,nall,ntypes,cut_coulsq) \
|
||||
in(vflag,eatom,f_stride,separate_flag,offload) \
|
||||
in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
|
||||
in(inv_denom_coul,cut_coul_innersq) \
|
||||
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
|
||||
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
|
||||
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
|
||||
signal(f_start)
|
||||
#endif
|
||||
{
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime();
|
||||
#endif
|
||||
|
||||
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
|
||||
f_stride, x, q);
|
||||
|
||||
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EFLAG) oevdwl = oecoul = (acc_t)0;
|
||||
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
#endif
|
||||
{
|
||||
int iifrom, iip, iito, tid;
|
||||
IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads);
|
||||
iifrom += astart;
|
||||
iito += astart;
|
||||
|
||||
int foff;
|
||||
if (NEWTON_PAIR) foff = tid * f_stride - minlocal;
|
||||
else foff = -minlocal;
|
||||
FORCE_T * _noalias const f = f_start + foff;
|
||||
if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
|
||||
flt_t cutboth = cut_coulsq;
|
||||
|
||||
const int toffs = tid * ccache_stride;
|
||||
flt_t * _noalias const tdelx = ccachex + toffs;
|
||||
flt_t * _noalias const tdely = ccachey + toffs;
|
||||
flt_t * _noalias const tdelz = ccachez + toffs;
|
||||
flt_t * _noalias const trsq = ccachew + toffs;
|
||||
int * _noalias const tj = ccachei + toffs;
|
||||
int * _noalias const tjtype = ccachej + toffs;
|
||||
|
||||
for (int i = iifrom; i < iito; i += iip) {
|
||||
// const int i = ilist[ii];
|
||||
const int itype = x[i].w;
|
||||
|
||||
const int ptr_off = itype * ntypes;
|
||||
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
|
||||
const LJ_T * _noalias const lji = lj + ptr_off;
|
||||
|
||||
const int * _noalias const jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
|
||||
acc_t fxtmp,fytmp,fztmp,fwtmp;
|
||||
acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5;
|
||||
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
const flt_t qtmp = q[i];
|
||||
fxtmp = fytmp = fztmp = (acc_t)0;
|
||||
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
|
||||
if (NEWTON_PAIR == 0)
|
||||
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
|
||||
|
||||
int ej = 0;
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma ivdep
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj] & NEIGHMASK;
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
trsq[ej]=rsq;
|
||||
tdelx[ej]=delx;
|
||||
tdely[ej]=dely;
|
||||
tdelz[ej]=delz;
|
||||
tjtype[ej]=x[j].w;
|
||||
tj[ej]=jlist[jj];
|
||||
ej++;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
|
||||
sv0, sv1, sv2, sv3, sv4, sv5)
|
||||
#endif
|
||||
for (int jj = 0; jj < ej; jj++) {
|
||||
flt_t forcecoul, forcelj, evdwl;
|
||||
forcecoul = forcelj = evdwl = (flt_t)0.0;
|
||||
|
||||
const int j = tj[jj] & NEIGHMASK;
|
||||
const int sbindex = tj[jj] >> SBBITS & 3;
|
||||
const flt_t rsq = trsq[jj];
|
||||
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||
const flt_t r_inv = (flt_t)1.0 / sqrt(rsq);
|
||||
forcecoul = qqrd2e * qtmp * q[j] * r_inv;
|
||||
if (rsq > cut_coul_innersq) {
|
||||
const flt_t ccr = cut_coulsq - rsq;
|
||||
const flt_t switch1 = ccr * ccr * inv_denom_coul *
|
||||
(cut_coulsq + (flt_t)2.0 * rsq - (flt_t)3.0 * cut_coul_innersq);
|
||||
forcecoul *= switch1;
|
||||
}
|
||||
|
||||
#ifdef INTEL_VMASK
|
||||
if (rsq < cut_ljsq) {
|
||||
#endif
|
||||
const int jtype = tjtype[jj];
|
||||
flt_t r6inv = r2inv * r2inv * r2inv;
|
||||
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
|
||||
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
|
||||
|
||||
#ifdef INTEL_VMASK
|
||||
if (rsq > cut_lj_innersq) {
|
||||
#endif
|
||||
const flt_t drsq = cut_ljsq - rsq;
|
||||
const flt_t cut2 = (rsq - cut_lj_innersq) * drsq;
|
||||
const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) *
|
||||
inv_denom_lj;
|
||||
const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj;
|
||||
if (EFLAG) {
|
||||
#ifndef INTEL_VMASK
|
||||
if (rsq > cut_lj_innersq) {
|
||||
#endif
|
||||
forcelj = forcelj * switch1 + evdwl * switch2;
|
||||
evdwl *= switch1;
|
||||
#ifndef INTEL_VMASK
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
const flt_t philj = r6inv * (lji[jtype].z*r6inv -
|
||||
lji[jtype].w);
|
||||
#ifndef INTEL_VMASK
|
||||
if (rsq > cut_lj_innersq)
|
||||
#endif
|
||||
forcelj = forcelj * switch1 + philj * switch2;
|
||||
}
|
||||
#ifdef INTEL_VMASK
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef INTEL_VMASK
|
||||
}
|
||||
#else
|
||||
if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
|
||||
#endif
|
||||
if (sbindex) {
|
||||
const flt_t factor_coul = special_coul[sbindex];
|
||||
forcecoul *= factor_coul;
|
||||
const flt_t factor_lj = special_lj[sbindex];
|
||||
forcelj *= factor_lj;
|
||||
if (EFLAG) evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
const flt_t fpair = (forcecoul + forcelj) * r2inv;
|
||||
const flt_t fpx = fpair * tdelx[jj];
|
||||
fxtmp += fpx;
|
||||
if (NEWTON_PAIR) f[j].x -= fpx;
|
||||
const flt_t fpy = fpair * tdely[jj];
|
||||
fytmp += fpy;
|
||||
if (NEWTON_PAIR) f[j].y -= fpy;
|
||||
const flt_t fpz = fpair * tdelz[jj];
|
||||
fztmp += fpz;
|
||||
if (NEWTON_PAIR) f[j].z -= fpz;
|
||||
|
||||
if (EFLAG) {
|
||||
sevdwl += evdwl;
|
||||
secoul += forcecoul;
|
||||
if (eatom) {
|
||||
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * forcecoul;
|
||||
if (NEWTON_PAIR)
|
||||
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * forcecoul;
|
||||
}
|
||||
}
|
||||
if (NEWTON_PAIR == 0)
|
||||
IP_PRE_ev_tally_nborv(vflag, tdelx[jj], tdely[jj], tdelz[jj],
|
||||
fpx, fpy, fpz);
|
||||
} // for jj
|
||||
if (NEWTON_PAIR) {
|
||||
f[i].x += fxtmp;
|
||||
f[i].y += fytmp;
|
||||
f[i].z += fztmp;
|
||||
} else {
|
||||
f[i].x = fxtmp;
|
||||
f[i].y = fytmp;
|
||||
f[i].z = fztmp;
|
||||
}
|
||||
IP_PRE_ev_tally_atomq(NEWTON_PAIR, EFLAG, vflag, f, fwtmp);
|
||||
} // for ii
|
||||
|
||||
IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start,
|
||||
f_stride, x, offload, vflag, ov0, ov1, ov2, ov3,
|
||||
ov4, ov5);
|
||||
} // end of omp parallel region
|
||||
|
||||
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
|
||||
if (EFLAG) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
oevdwl *= (acc_t)0.5;
|
||||
oecoul *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[0] = oevdwl;
|
||||
ev_global[1] = oecoul;
|
||||
}
|
||||
if (vflag) {
|
||||
if (NEWTON_PAIR == 0) {
|
||||
ov0 *= (acc_t)0.5;
|
||||
ov1 *= (acc_t)0.5;
|
||||
ov2 *= (acc_t)0.5;
|
||||
ov3 *= (acc_t)0.5;
|
||||
ov4 *= (acc_t)0.5;
|
||||
ov5 *= (acc_t)0.5;
|
||||
}
|
||||
ev_global[2] = ov0;
|
||||
ev_global[3] = ov1;
|
||||
ev_global[4] = ov2;
|
||||
ev_global[5] = ov3;
|
||||
ev_global[6] = ov4;
|
||||
ev_global[7] = ov5;
|
||||
}
|
||||
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
|
||||
*timer_compute = MIC_Wtime() - *timer_compute;
|
||||
#endif
|
||||
} // end of offload region
|
||||
|
||||
if (offload)
|
||||
fix->stop_watch(TIME_OFFLOAD_LATENCY);
|
||||
else
|
||||
fix->stop_watch(TIME_HOST_PAIR);
|
||||
|
||||
if (EFLAG || vflag)
|
||||
fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
|
||||
else
|
||||
fix->add_result_array(f_start, 0, offload);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJCharmmCoulCharmmIntel::init_style()
|
||||
{
|
||||
PairLJCharmmCoulCharmm::init_style();
|
||||
if (force->newton_pair == 0) {
|
||||
neighbor->requests[neighbor->nrequest-1]->half = 0;
|
||||
neighbor->requests[neighbor->nrequest-1]->full = 1;
|
||||
}
|
||||
neighbor->requests[neighbor->nrequest-1]->intel = 1;
|
||||
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
fix->pair_init_check();
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_cop = fix->coprocessor_number();
|
||||
#endif
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void PairLJCharmmCoulCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
int off_ccache = 0;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_cop >= 0) off_ccache = 1;
|
||||
#endif
|
||||
buffers->grow_ccache(off_ccache, comm->nthreads, 1);
|
||||
_ccache_stride = buffers->ccache_stride();
|
||||
|
||||
int tp1 = atom->ntypes + 1;
|
||||
|
||||
fc.set_ntypes(tp1, memory, _cop);
|
||||
buffers->set_ntypes(tp1);
|
||||
flt_t **cutneighsq = buffers->get_cutneighsq();
|
||||
|
||||
// Repeat cutsq calculation because done after call to init_style
|
||||
double cut, cutneigh;
|
||||
if (cut_lj > cut_coul)
|
||||
error->all(FLERR,
|
||||
"Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic");
|
||||
for (int i = 1; i <= atom->ntypes; i++) {
|
||||
for (int j = i; j <= atom->ntypes; j++) {
|
||||
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
|
||||
cut = init_one(i, j);
|
||||
cutneigh = cut + neighbor->skin;
|
||||
cutsq[i][j] = cutsq[j][i] = cut*cut;
|
||||
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
|
||||
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
|
||||
cut_ljsq = cut_lj * cut_lj;
|
||||
cut_coulsq = cut_coul * cut_coul;
|
||||
cut_bothsq = MAX(cut_ljsq, cut_coulsq);
|
||||
|
||||
fc.cut_coulsq = cut_coulsq;
|
||||
fc.cut_ljsq = cut_ljsq;
|
||||
fc.cut_coul_innersq = cut_coul_innersq;
|
||||
fc.cut_lj_innersq = cut_lj_innersq;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
fc.special_lj[i] = force->special_lj[i];
|
||||
fc.special_coul[i] = force->special_coul[i];
|
||||
fc.special_coul[0] = 1.0;
|
||||
fc.special_lj[0] = 1.0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < tp1; i++) {
|
||||
for (int j = 0; j < tp1; j++) {
|
||||
fc.lj[i][j].x = lj1[i][j];
|
||||
fc.lj[i][j].y = lj2[i][j];
|
||||
fc.lj[i][j].z = lj3[i][j];
|
||||
fc.lj[i][j].w = lj4[i][j];
|
||||
fc.cutsq[i][j] = cutsq[i][j];
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_cop < 0) return;
|
||||
flt_t * special_lj = fc.special_lj;
|
||||
flt_t * special_coul = fc.special_coul;
|
||||
flt_t * cutsq = fc.cutsq[0];
|
||||
LJ_T * lj = fc.lj[0];
|
||||
flt_t * ocutneighsq = cutneighsq[0];
|
||||
int tp1sq = tp1 * tp1;
|
||||
#pragma offload_transfer target(mic:_cop) \
|
||||
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
|
||||
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \
|
||||
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
|
||||
#endif
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void PairLJCharmmCoulCharmmIntel::ForceConst<flt_t>::set_ntypes(
|
||||
const int ntypes, Memory *memory, const int cop) {
|
||||
if (ntypes != _ntypes) {
|
||||
if (_ntypes > 0) {
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
flt_t * ospecial_lj = special_lj;
|
||||
flt_t * ospecial_coul = special_coul;
|
||||
flt_t * ocutsq = cutsq[0];
|
||||
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
|
||||
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
|
||||
ospecial_coul != NULL && cop >= 0) {
|
||||
#pragma offload_transfer target(mic:cop) \
|
||||
nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \
|
||||
nocopy(ocutsq, olj: alloc_if(0) free_if(1))
|
||||
}
|
||||
#endif
|
||||
|
||||
_memory->destroy(cutsq);
|
||||
_memory->destroy(lj);
|
||||
}
|
||||
if (ntypes > 0) {
|
||||
_cop = cop;
|
||||
memory->create(cutsq,ntypes,ntypes,"fc.cutsq");
|
||||
memory->create(lj,ntypes,ntypes,"fc.lj");
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
flt_t * ospecial_lj = special_lj;
|
||||
flt_t * ospecial_coul = special_coul;
|
||||
flt_t * ocutsq = cutsq[0];
|
||||
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
|
||||
int tp1sq = ntypes*ntypes;
|
||||
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
|
||||
ospecial_coul != NULL && cop >= 0) {
|
||||
#pragma offload_transfer target(mic:cop) \
|
||||
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
|
||||
nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \
|
||||
nocopy(ocutsq,olj: length(tp1sq) alloc_if(1) free_if(0))
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
_ntypes=ntypes;
|
||||
_memory=memory;
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(lj/charmm/coul/charmm/intel,PairLJCharmmCoulCharmmIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_INTEL_H
|
||||
#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_INTEL_H
|
||||
|
||||
#include "pair_lj_charmm_coul_charmm.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairLJCharmmCoulCharmmIntel : public PairLJCharmmCoulCharmm {
|
||||
|
||||
public:
|
||||
PairLJCharmmCoulCharmmIntel(class LAMMPS *);
|
||||
virtual ~PairLJCharmmCoulCharmmIntel();
|
||||
|
||||
virtual void compute(int, int);
|
||||
void init_style();
|
||||
|
||||
typedef struct { float x,y,z; int w; } sng4_t;
|
||||
|
||||
private:
|
||||
FixIntel *fix;
|
||||
int _cop, _ccache_stride;
|
||||
|
||||
template <class flt_t> class ForceConst;
|
||||
template <class flt_t, class acc_t>
|
||||
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
|
||||
void eval(const int offload, const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const ForceConst<flt_t> &fc, const int astart, const int aend);
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t, acc_t> *buffers);
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
template <class flt_t>
|
||||
class ForceConst {
|
||||
public:
|
||||
_alignvar(flt_t special_coul[4],64);
|
||||
_alignvar(flt_t special_lj[4],64);
|
||||
flt_t **cutsq;
|
||||
flt_t cut_coulsq, cut_ljsq;
|
||||
flt_t cut_coul_innersq, cut_lj_innersq;
|
||||
typename IntelBuffers<flt_t,flt_t>::vec4_t **lj;
|
||||
|
||||
ForceConst() : _ntypes(0) {}
|
||||
~ForceConst() { set_ntypes(0,NULL,_cop); }
|
||||
|
||||
void set_ntypes(const int ntypes, Memory *memory, const int cop);
|
||||
|
||||
private:
|
||||
int _ntypes, _cop;
|
||||
Memory *_memory;
|
||||
};
|
||||
ForceConst<float> force_const_single;
|
||||
ForceConst<double> force_const_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: The 'package intel' command is required for /intel styles
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Intel varient of lj/charmm/coul/charmm expects lj cutoff<=coulombic
|
||||
|
||||
The intel accelerated version of the CHARMM style requires that the
|
||||
Lennard-Jones cutoff is not greater than the coulombic cutoff.
|
||||
|
||||
*/
|
|
@ -0,0 +1,42 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Markus Hohnerbach (RWTH)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pair_rebo_intel.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PairREBOIntel::PairREBOIntel(LAMMPS *lmp) : PairAIREBOIntel(lmp) {}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
global settings
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairREBOIntel::settings(int narg, char **arg)
|
||||
{
|
||||
if (narg != 0) error->all(FLERR,"Illegal pair_style command");
|
||||
|
||||
cutlj = 0.0;
|
||||
ljflag = torflag = 0;
|
||||
//
|
||||
// this one parameter for C-C interactions is different in REBO vs AIREBO
|
||||
// see Favata, Micheletti, Ryu, Pugno, Comp Phys Comm (2016)
|
||||
|
||||
PCCf_2_0 = 0.0;
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: Markus Hohnerbach (RWTH)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PAIR_CLASS
|
||||
|
||||
PairStyle(rebo/intel,PairREBOIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PAIR_REBO_INTEL_H
|
||||
#define LMP_PAIR_REBO_INTEL_H
|
||||
|
||||
#include "pair_airebo_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class PairREBOIntel : public PairAIREBOIntel {
|
||||
public:
|
||||
PairREBOIntel(class LAMMPS *);
|
||||
virtual void settings(int, char **);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
|
@ -345,16 +345,17 @@ void PairSWIntel::eval(const int offload, const int vflag,
|
|||
if (jj < jnumhalf) ejnumhalf++;
|
||||
}
|
||||
}
|
||||
int ejnum_pad = ejnum;
|
||||
|
||||
while ( (ejnum_pad % pad_width) != 0) {
|
||||
tdelx[ejnum_pad] = (flt_t)0.0;
|
||||
tdely[ejnum_pad] = (flt_t)0.0;
|
||||
tdelz[ejnum_pad] = (flt_t)0.0;
|
||||
trsq[ejnum_pad] = p2[3].cutsq + (flt_t)1.0;
|
||||
tj[ejnum_pad] = nall;
|
||||
if (!ONETYPE) tjtype[ejnum_pad] = 0;
|
||||
ejnum_pad++;
|
||||
int ejrem = ejnum & (pad_width - 1);
|
||||
if (ejrem) ejrem = pad_width - ejrem;
|
||||
const int ejnum_pad = ejnum + ejrem;
|
||||
for (int jj = ejnum; jj < ejnum_pad; jj++) {
|
||||
tdelx[jj] = (flt_t)0.0;
|
||||
tdely[jj] = (flt_t)0.0;
|
||||
tdelz[jj] = (flt_t)0.0;
|
||||
trsq[jj] = p2[3].cutsq + (flt_t)1.0;
|
||||
tj[jj] = nall;
|
||||
if (!ONETYPE) tjtype[jj] = 0;
|
||||
}
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
|
|
Loading…
Reference in New Issue