USER-INTEL add-ons from Mike

This commit is contained in:
Steve Plimpton 2017-08-15 17:12:07 -06:00
parent 0b3f1b8a15
commit 1d4d2155a2
35 changed files with 9675 additions and 49 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -29,8 +29,10 @@ Bond Styles: fene, harmonic :l
Dihedral Styles: charmm, harmonic, opls :l
Fixes: nve, npt, nvt, nvt/sllod :l
Improper Styles: cvff, harmonic :l
Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l
Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long,
buck, eam, eam/alloy, eam/fs, gayberne, charmm/coul/charmm,
charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, rebo,
sw, tersoff :l
K-Space Styles: pppm, pppm/disp :l
:ule

View File

@ -7,10 +7,13 @@
:line
pair_style airebo command :h3
pair_style airebo/intel command :h3
pair_style airebo/omp command :h3
pair_style airebo/morse command :h3
pair_style airebo/morse/intel command :h3
pair_style airebo/morse/omp command :h3
pair_style rebo command :h3
pair_style rebo/intel command :h3
pair_style rebo/omp command :h3
[Syntax:]

View File

@ -7,6 +7,7 @@
:line
pair_style lj/charmm/coul/charmm command :h3
pair_style lj/charmm/coul/charmm/intel command :h3
pair_style lj/charmm/coul/charmm/omp command :h3
pair_style lj/charmm/coul/charmm/implicit command :h3
pair_style lj/charmm/coul/charmm/implicit/omp command :h3

View File

@ -14,6 +14,7 @@ pair_style eam/omp command :h3
pair_style eam/opt command :h3
pair_style eam/alloy command :h3
pair_style eam/alloy/gpu command :h3
pair_style eam/alloy/intel command :h3
pair_style eam/alloy/kk command :h3
pair_style eam/alloy/omp command :h3
pair_style eam/alloy/opt command :h3
@ -21,6 +22,7 @@ pair_style eam/cd command :h3
pair_style eam/cd/omp command :h3
pair_style eam/fs command :h3
pair_style eam/fs/gpu command :h3
pair_style eam/fs/intel command :h3
pair_style eam/fs/kk command :h3
pair_style eam/fs/omp command :h3
pair_style eam/fs/opt command :h3

View File

@ -14,7 +14,7 @@ SHFLAGS = -fPIC
DEPFLAGS = -M
LINK = mpiicpc
LINKFLAGS = -g -qopenmp $(OPTFLAGS)
LINKFLAGS = -qopenmp $(OPTFLAGS)
LIB = -ltbbmalloc
SIZE = size

View File

@ -7,7 +7,7 @@ SHELL = /bin/sh
# specify flags and libraries needed for your compiler
CC = mpicxx -cxx=icc
OPTFLAGS = -xAVX -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC

View File

@ -8,7 +8,7 @@ SHELL = /bin/sh
export OMPI_CXX = icc
CC = mpicxx
OPTFLAGS = -xAVX -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC

View File

@ -46,6 +46,7 @@ action npair_intel.h
action npair_intel.cpp
action intel_simd.h pair_sw_intel.cpp
action intel_intrinsics.h pair_tersoff_intel.cpp
action intel_intrinsics_airebo.h pair_airebo_intel.cpp
action verlet_lrt_intel.h pppm.cpp
action verlet_lrt_intel.cpp pppm.cpp

View File

@ -4,9 +4,9 @@
--------------------------------
W. Michael Brown (Intel) michael.w.brown at intel.com
Markus Hohnerbach (RWTH Aachen University)
William McDoniel (RWTH Aachen University)
Rodrigo Canales (RWTH Aachen University)
Markus H<>hnerbach (RWTH Aachen University)
Stan Moore (Sandia)
Ahmed E. Ismail (RWTH Aachen University)
Paolo Bientinesi (RWTH Aachen University)

View File

@ -8,6 +8,7 @@
# in.intel.sw - Silicon benchmark with Stillinger-Weber
# in.intel.tersoff - Silicon benchmark with Tersoff
# in.intel.water - Coarse-grain water benchmark using Stillinger-Weber
# in.intel.airebo - Polyethelene benchmark with AIREBO
#
#############################################################################
@ -24,6 +25,7 @@
# in.intel.sw - 132.4 161.9
# in.intel.tersoff - 83.3 101.1
# in.intel.water - 53.4 90.3
# in.intel.airebo - 7.3 11.8
#
#############################################################################

View File

@ -0,0 +1,47 @@
# AIREBO polyethelene benchmark
variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 550 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node
variable p index 0 # Use Power Measurement
variable x index 4
variable y index 2
variable z index 2
variable xx equal 17*$x
variable yy equal 16*$y
variable zz equal 2*$z
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
newton $N
if "$n > 0" then "processors * * * grid numa"
variable root getenv LMP_ROOT
units metal
atom_style atomic
read_data ${root}/examples/airebo/data.airebo
replicate ${xx} ${yy} ${zz}
neighbor 0.5 bin
neigh_modify delay 5 every 1
pair_style airebo 3.0 1 1
pair_coeff * * ${root}/potentials/CH.airebo C H
velocity all create 300.0 761341
fix 1 all nve
timestep 0.0005
thermo 50
if "$p > 0" then "run_style verlet/power"
if "$w > 0" then "run $w"
run ${rr}

View File

@ -5,7 +5,6 @@ variable w index 10 # Warmup Timesteps
variable t index 3100 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node
variable b index 3 # Neighbor binsize
variable p index 0 # Use Power Measurement
variable x index 4

View File

@ -5,7 +5,6 @@ variable w index 10 # Warmup Timesteps
variable t index 520 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node
variable b index 3 # Neighbor binsize
variable p index 0 # Use Power Measurement
variable c index 0 # 1 to use collectives for PPPM
variable d index 1 # 1 to use 'diff ad' for PPPM

View File

@ -30,6 +30,9 @@ IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
_off_map_listlocal = 0;
_ccachex = 0;
_ncache_alloc = 0;
_ncachetag = 0;
_cutneighsq = 0;
_cutneighghostsq = 0;
#ifdef _LMP_INTEL_OFFLOAD
_separate_buffers = 0;
_off_f = 0;
@ -447,12 +450,17 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
flt_t *ncachez = _ncachez;
int *ncachej = _ncachej;
int *ncachejtype = _ncachejtype;
int *ncachetag = _ncachetag;
#ifdef _LMP_INTEL_OFFLOAD
if (_off_ncache) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ncachex,ncachey,ncachez,ncachej:alloc_if(0) free_if(1)) \
nocopy(ncachejtype:alloc_if(0) free_if(1))
if (ncachetag) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ncachetag:alloc_if(0) free_if(1))
}
}
_off_ncache = 0;
#endif
@ -462,8 +470,10 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
lmp->memory->destroy(ncachez);
lmp->memory->destroy(ncachej);
lmp->memory->destroy(ncachejtype);
if (ncachetag)
lmp->memory->destroy(ncachetag);
_ncache_alloc = 0;
_ncachetag = 0;
}
}
@ -480,7 +490,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
const int vsize = _ncache_stride * nt;
if (_ncache_alloc) {
if (vsize > _ncache_alloc)
if (vsize > _ncache_alloc || (need_tag() && _ncachetag == 0))
free_ncache();
#ifdef _LMP_INTEL_OFFLOAD
else if (off_flag && _off_ncache == 0)
@ -495,6 +505,8 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
lmp->memory->create(_ncachez, vsize, "_ncachez");
lmp->memory->create(_ncachej, vsize, "_ncachej");
lmp->memory->create(_ncachejtype, vsize, "_ncachejtype");
if (need_tag())
lmp->memory->create(_ncachetag, vsize, "_ncachetag");
_ncache_alloc = vsize;
@ -513,6 +525,14 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
nocopy(ncachez,ncachej:length(vsize) alloc_if(1) free_if(0)) \
nocopy(ncachejtype:length(vsize) alloc_if(1) free_if(0))
}
int tsize = vsize;
if (!need_tag()) {
tsize = 16;
lmp->memory->create(_ncachetag, tsize, "_ncachetag");
}
int *ncachetag = _ncachetag;
#pragma offload_transfer target(mic:_cop) \
nocopy(ncachetag:length(tsize) alloc_if(1) free_if(0))
_off_ncache = 1;
}
#endif
@ -548,7 +568,8 @@ void IntelBuffers<flt_t, acc_t>::fdotr_reduce(const int nall,
/* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes,
const int use_ghost_cut)
{
if (ntypes != _ntypes) {
if (_ntypes > 0) {
@ -558,16 +579,34 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:alloc_if(0) free_if(1))
}
flt_t * cutneighghostsqo;
if (_cutneighghostsq && _off_threads > 0 && cutneighghostsqo != 0) {
cutneighghostsqo = _cutneighghostsq[0];
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighghostsqo:alloc_if(0) free_if(1))
}
#endif
lmp->memory->destroy(_cutneighsq);
if (_cutneighghostsq != 0) lmp->memory->destroy(_cutneighghostsq);
}
if (ntypes > 0) {
lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq");
if (use_ghost_cut)
lmp->memory->create(_cutneighghostsq, ntypes, ntypes,
"_cutneighghostsq");
#ifdef _LMP_INTEL_OFFLOAD
flt_t * cutneighsqo = _cutneighsq[0];
const int ntypes2 = ntypes * ntypes;
if (_off_threads > 0 && cutneighsqo != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0))
nocopy(cutneighsqo:length(ntypes2) alloc_if(1) free_if(0))
}
if (use_ghost_cut) {
flt_t * cutneighghostsqo = _cutneighghostsq[0];
if (_off_threads > 0 && cutneighghostsqo != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighghostsqo:length(ntypes2) alloc_if(1) free_if(0))
}
}
#endif
}

View File

@ -109,12 +109,14 @@ class IntelBuffers {
void free_ncache();
void grow_ncache(const int off_flag, const int nthreads);
void grow_ncachetag(const int off_flag, const int nthreads);
inline int ncache_stride() { return _ncache_stride; }
inline flt_t * get_ncachex() { return _ncachex; }
inline flt_t * get_ncachey() { return _ncachey; }
inline flt_t * get_ncachez() { return _ncachez; }
inline int * get_ncachej() { return _ncachej; }
inline int * get_ncachejtype() { return _ncachejtype; }
inline int * get_ncachetag() { return _ncachetag; }
inline int get_max_nbors() {
int mn = lmp->neighbor->oneatom * sizeof(int) /
@ -131,7 +133,7 @@ class IntelBuffers {
_grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
}
void set_ntypes(const int ntypes);
void set_ntypes(const int ntypes, const int use_ghost_cut = 0);
inline int * firstneigh(const NeighList *list) { return _list_alloc; }
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
@ -162,6 +164,7 @@ class IntelBuffers {
inline void zero_ev()
{ for (int i = 0; i < 8; i++) _ev_global[i] = _ev_global_host[i] = 0.0; }
inline flt_t ** get_cutneighsq() { return _cutneighsq; }
inline flt_t ** get_cutneighghostsq() { return _cutneighghostsq; }
inline int get_off_threads() { return _off_threads; }
#ifdef _LMP_INTEL_OFFLOAD
inline void set_off_params(const int n, const int cop,
@ -274,13 +277,10 @@ class IntelBuffers {
used_ghost * sizeof(flt_t));
}
}
#endif
inline int need_tag() { return _need_tag; }
inline void need_tag(const int nt) { _need_tag = nt; }
#else
inline int need_tag() { return 0; }
inline void need_tag(const int nt) { }
#endif
double memory_usage(const int nthreads);
@ -298,7 +298,7 @@ class IntelBuffers {
int _list_alloc_atoms;
int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked;
flt_t **_cutneighsq;
flt_t **_cutneighsq, **_cutneighghostsq;
int _ntypes;
int _ccache_stride;
@ -307,7 +307,10 @@ class IntelBuffers {
int _ncache_stride, _ncache_alloc;
flt_t *_ncachex, *_ncachey, *_ncachez;
int *_ncachej, *_ncachejtype;
int *_ncachej, *_ncachejtype, *_ncachetag;
int _need_tag, _host_nmax;
#ifdef LMP_USE_AVXCD
int _ccache_stride3;
acc_t * _ccachef;
@ -324,7 +327,6 @@ class IntelBuffers {
int *_off_map_special, *_off_map_nspecial, *_off_map_tag;
int *_off_map_numneigh;
bool _off_list_alloc;
int _need_tag, _host_nmax;
#endif
int _buf_size, _buf_local_size;

File diff suppressed because it is too large Load Diff

View File

@ -211,6 +211,8 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
for (i = nall-1; i >= nlocal; i--) {
if (mask[i] & bitmask) {
ibin = coord2bin(atom->x[i]);
// Only necessary to store when neighboring ghost
atombin[i] = ibin;
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
@ -222,14 +224,10 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
binhead[ibin] = i;
}
} else {
for (i = nall-1; i >= nlocal; i--) {
for (i = nall-1; i >= 0; i--) {
ibin = coord2bin(atom->x[i]);
bins[i] = binhead[ibin];
binhead[ibin] = i;
}
for (i = nlocal-1; i >= 0; i--) {
ibin = coord2bin(atom->x[i]);
atombin[i]=ibin;
// Only necessary to store for ghost when neighboring ghost
atombin[i] = ibin;
bins[i] = binhead[ibin];
binhead[ibin] = i;
}

View File

@ -0,0 +1,593 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_full_bin_ghost_intel.h"
#include "neighbor.h"
#include "nstencil.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairFullBinGhostIntel::NPairFullBinGhostIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
include neighbors of ghost atoms, but no "special neighbors" for ghosts
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinGhostIntel::build(NeighList *list)
{
#ifdef _LMP_INTEL_OFFLOAD
if (_fix->offload_noghost())
error->all(FLERR,
"The 'ghost no' option cannot be used with this USER-INTEL pair style.");
#endif
if (nstencil > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
fbi(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
fbi(list, _fix->get_double_buffers());
else
fbi(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
/* ---------------------------------------------------------------------- */
template<class flt_t, class acc_t>
void NPairFullBinGhostIntel::fbi(NeighList * list,
IntelBuffers<flt_t,acc_t> * buffers)
{
const int nlocal = atom->nlocal;
const int nall = atom->nlocal + atom->nghost;
list->inum = atom->nlocal;
list->gnum = atom->nghost;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
// only uses offload_end_neighbor to check whether we are doing offloading
// at all, no need to correct this later
buffers->grow_list(list, nall, comm->nthreads, off_end,
_fix->nbor_pack_width());
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
if (need_ic) {
fbi<flt_t,acc_t,1>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal);
} else {
fbi<flt_t,acc_t,0>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal);
}
}
/* ---------------------------------------------------------------------- */
template<class flt_t, class acc_t, int need_ic>
void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
IntelBuffers<flt_t,acc_t> * buffers,
const int pstart, const int pend) {
if (pend-pstart == 0) return;
const int nall = atom->nlocal + atom->nghost;
int pad = 1;
int nall_t = nall;
const int aend = nall;
const int pack_width = _fix->nbor_pack_width();
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
const int e_nall = nall_t;
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
int tag_size = 0, special_size;
if (buffers->need_tag()) tag_size = e_nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
special_size = 0;
}
const tagint * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const tagint * _noalias const tag = atom->tag;
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = this->nstencil;
const int * _noalias const stencil = this->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const flt_t * _noalias const cutneighghostsq =
buffers->get_cutneighghostsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
#endif
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int moltemplate;
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
if (moltemplate)
error->all(FLERR,
"Can't use moltemplate with npair style full/bin/ghost/intel.");
int tnum;
int *overflow;
double *timer_compute;
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
timer_compute = _fix->off_watch_neighbor();
tnum = buffers->get_off_threads();
overflow = _fix->get_off_overflow_flag();
_fix->stop_watch(TIME_HOST_NEIGHBOR);
_fix->start_watch(TIME_OFFLOAD_LATENCY);
} else
#endif
{
tnum = comm->nthreads;
overflow = _fix->get_overflow_flag();
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int * _noalias const binpacked = buffers->get_binpacked();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
flt_t * _noalias const ncachex = buffers->get_ncachex();
flt_t * _noalias const ncachey = buffers->get_ncachey();
flt_t * _noalias const ncachez = buffers->get_ncachez();
int * _noalias const ncachej = buffers->get_ncachej();
int * _noalias const ncachejtype = buffers->get_ncachejtype();
int * _noalias const ncachetag = buffers->get_ncachetag();
const int ncache_stride = buffers->ncache_stride();
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * const stencilxyz = &this->stencilxyz[0][0];
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const bins = this->bins;
const int cop = _fix->coprocessor_number();
const int separate_buffers = _fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
in(cutneighghostsq:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
in(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \
in(ncachejtype,ncachetag:length(0) alloc_if(0) free_if(0)) \
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
in(separate_buffers,aend,nlocal,molecular,ntypes,mbinx,mbiny) \
in(mbinz,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
in(stencilxyz:length(3*nstencil)) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
#ifdef _LMP_INTEL_OFFLOAD
overflow[LMP_LOCAL_MIN] = 0;
overflow[LMP_LOCAL_MAX] = aend - 1;
overflow[LMP_GHOST_MIN] = e_nall;
overflow[LMP_GHOST_MAX] = -1;
#endif
int nstencilp = 0;
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
for (int k = 0; k < nstencil; k++) {
binstart[nstencilp] = stencil[k];
int end = stencil[k] + 1;
for (int kk = k + 1; kk < nstencil; kk++) {
if (stencil[kk-1]+1 == stencil[kk]) {
end++;
k++;
} else break;
}
binend[nstencilp] = end;
nstencilp++;
}
const int mbinyx = mbiny * mbinx;
#if defined(_OPENMP)
#pragma omp parallel
#endif
{
const int num = aend;
int tid, ifrom, ito;
const double balance_factor = 2.0;
const double ibalance_factor = 1.0 / balance_factor;
const int gnum = num - nlocal;
const int wlocal = static_cast<int>(ceil(balance_factor * nlocal));
const int snum = wlocal + gnum;
IP_PRE_omp_range_id(ifrom, ito, tid, snum, nthreads);
if (ifrom < wlocal) ifrom = static_cast<int>(ibalance_factor * ifrom);
else ifrom -= wlocal - nlocal;
if (ito < wlocal) ito = static_cast<int>(ibalance_factor * ito);
else ito -= wlocal - nlocal;
int e_ito = ito;
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
int which;
int pack_offset = maxnbors;
int ct = (ifrom + tid * 2) * maxnbors;
int *neighptr = firstneigh + ct;
const int obound = pack_offset + maxnbors * 2;
const int toffs = tid * ncache_stride;
flt_t * _noalias const tx = ncachex + toffs;
flt_t * _noalias const ty = ncachey + toffs;
flt_t * _noalias const tz = ncachez + toffs;
int * _noalias const tj = ncachej + toffs;
int * _noalias const tjtype = ncachejtype + toffs;
int * _noalias const ttag = ncachetag + toffs;
// loop over all atoms in other bins in stencil, store every pair
int istart, icount, ncount, oldbin = -9999999, lane, max_chunk;
for (int i = ifrom; i < ito; i++) {
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const int itype = x[i].w;
const tagint itag = tag[i];
const int ioffset = ntypes * itype;
const int ibin = atombin[i];
if (ibin != oldbin) {
oldbin = ibin;
ncount = 0;
if (i < nlocal) {
for (int k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = bstart; jj < bend; jj++)
tj[ncount++] = binpacked[jj];
}
} else {
const int zbin = ibin / mbinyx;
const int zrem = ibin % mbinyx;
const int ybin = zrem / mbinx;
const int xbin = zrem % mbinx;
for (int k = 0; k < nstencil; k++) {
const int xbin2 = xbin + stencilxyz[3 * k + 0];
const int ybin2 = ybin + stencilxyz[3 * k + 1];
const int zbin2 = zbin + stencilxyz[3 * k + 2];
if (xbin2 < 0 || xbin2 >= mbinx ||
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
const int bstart = binhead[ibin + stencil[k]];
const int bend = binhead[ibin + stencil[k] + 1];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = bstart; jj < bend; jj++)
tj[ncount++] = binpacked[jj];
}
} // if i < nlocal
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int u = 0; u < ncount; u++) {
const int j = tj[u];
tx[u] = x[j].x;
ty[u] = x[j].y;
tz[u] = x[j].z;
tjtype[u] = x[j].w;
ttag[u] = tag[j];
}
} // if ibin != oldbin
// ---------------------- Loop over other bins
int n = maxnbors;
int n2 = n * 2;
int *neighptr2 = neighptr;
const flt_t * _noalias cutsq;
if (i < nlocal) cutsq = cutneighsq;
else cutsq = cutneighghostsq;
const int icp = i;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int u = 0; u < ncount; u++) {
int addme = 1;
int j = tj[u];
if (i == j) addme = 0;
// Cutoff Check
const flt_t delx = xtmp - tx[u];
const flt_t dely = ytmp - ty[u];
const flt_t delz = ztmp - tz[u];
const int jtype = tjtype[u];
const int jtag = ttag[u];
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutsq[ioffset + jtype]) addme = 0;
if (need_ic && icp < nlocal) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
j = -j - 1;
}
int flist = 0;
if (itag > jtag) {
if (((itag+jtag) & 1) == 0) flist = 1;
} else if (itag < jtag) {
if (((itag+jtag) & 1) == 1) flist = 1;
} else {
if (tz[u] < ztmp) flist = 1;
else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1;
else if (tz[u] == ztmp && ty[u] == ytmp && tx[u] < xtmp)
flist = 1;
}
if (addme) {
if (flist)
neighptr2[n2++] = j;
else
neighptr[n++] = j;
}
} // for u
#ifndef _LMP_INTEL_OFFLOAD
if (exclude) {
int alln = n;
n = maxnbors;
for (int u = pack_offset; u < alln; u++) {
const int j = neighptr[u];
int pj = j;
if (need_ic)
if (pj < 0) pj = -j - 1;
const int jtype = x[pj].w;
if (exclusion(i,pj,itype,jtype,mask,molecule)) continue;
neighptr[n++] = j;
}
alln = n2;
n2 = maxnbors * 2;
for (int u = n2; u < alln; u++) {
const int j = neighptr[u];
int pj = j;
if (need_ic)
if (pj < 0) pj = -j - 1;
const int jtype = x[pj].w;
if (exclusion(i,pj,itype,jtype,mask,molecule)) continue;
neighptr[n2++] = j;
}
}
#endif
int ns = n - maxnbors;
int alln = n;
atombin[i] = ns;
n = 0;
for (int u = maxnbors; u < alln; u++)
neighptr[n++] = neighptr[u];
ns += n2 - maxnbors * 2;
for (int u = maxnbors * 2; u < n2; u++)
neighptr[n++] = neighptr[u];
if (ns > maxnbors) *overflow = 1;
ilist[i] = i;
cnumneigh[i] = ct;
numneigh[i] = ns;
ct += ns;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
const int edge = ct & (alignb - 1);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
if (ct + obound > list_size) {
if (i < ito - 1) {
*overflow = 1;
ct = (ifrom + tid * 2) * maxnbors;
}
}
}
if (*overflow == 1)
for (int i = ifrom; i < ito; i++)
numneigh[i] = 0;
#ifdef _LMP_INTEL_OFFLOAD
int ghost_offset = 0, nall_offset = e_nall;
if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
#if __INTEL_COMPILER+0 > 1499
#pragma vector aligned
#pragma simd
#endif
for (int jj = 0; jj < jnum; jj++) {
int j = jlist[jj];
if (need_ic && j < 0) j = -j - 1;
}
}
overflow[LMP_LOCAL_MIN] = 0;
overflow[LMP_LOCAL_MAX] = nlocal - 1;
overflow[LMP_GHOST_MIN] = nlocal;
overflow[LMP_GHOST_MAX] = e_nall - 1;
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
if (nghost < 0) nghost = 0;
if (offload) {
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
} else {
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
nall_offset = nlocal + nghost;
}
} // if separate_buffers
#endif
if (molecular) {
int ito_m = ito;
if (ito >= nlocal) ito_m = nlocal;
for (int i = ifrom; i < ito_m; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j]);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)
jlist[jj] = nall_offset;
else if (which)
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
else jlist[jj]-=ghost_offset;
} else
#endif
if (which) jlist[jj] = j ^ (which << SBBITS);
}
} // for i
} // if molecular
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
#pragma vector aligned
#pragma simd
for (jj = 0; jj < jnum; jj++) {
if (jlist[jj] >= nlocal) {
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
else jlist[jj] -= ghost_offset;
}
}
}
}
#endif
} // end omp
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end offload
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
_fix->start_watch(TIME_HOST_NEIGHBOR);
for (int n = 0; n < aend; n++) {
ilist[n] = n;
numneigh[n] = 0;
}
} else {
for (int i = 0; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
if (separate_buffers) {
_fix->start_watch(TIME_PACK);
_fix->set_neighbor_host_sizes();
buffers->pack_sep_from_single(_fix->host_min_local(),
_fix->host_used_local(),
_fix->host_min_ghost(),
_fix->host_used_ghost());
_fix->stop_watch(TIME_PACK);
}
}
#else
#pragma vector aligned
#pragma simd
for (int i = 0; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
#endif
}

View File

@ -0,0 +1,55 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(full/bin/ghost/intel,
NPairFullBinGhostIntel,
NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF |
NP_ORTHO | NP_TRI | NP_INTEL)
#else
#ifndef LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
#define LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
#include "npair_intel.h"
namespace LAMMPS_NS {
class NPairFullBinGhostIntel : public NPairIntel {
public:
NPairFullBinGhostIntel(class LAMMPS *);
~NPairFullBinGhostIntel() {}
void build(class NeighList *);
private:
template<class flt_t, class acc_t>
void fbi(NeighList * list, IntelBuffers<flt_t,acc_t> * buffers);
template<class flt_t, class acc_t, int need_ic>
void fbi(const int offload, NeighList * list,
IntelBuffers<flt_t,acc_t> * buffers,
const int astart, const int aend);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -143,6 +143,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
flt_t * _noalias const ncachez = buffers->get_ncachez();
int * _noalias const ncachej = buffers->get_ncachej();
int * _noalias const ncachejtype = buffers->get_ncachejtype();
int * _noalias const ncachetag = buffers->get_ncachetag();
const int ncache_stride = buffers->ncache_stride();
#ifdef _LMP_INTEL_OFFLOAD
@ -165,7 +166,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \
in(ncachejtype:length(0) alloc_if(0) free_if(0)) \
in(ncachejtype,ncachetag:length(0) alloc_if(0) free_if(0)) \
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
in(pad_width,offload_end,separate_buffers,astart,aend,nlocal,molecular) \
in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
@ -222,7 +223,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
ito += astart;
int e_ito = ito;
if (THREE && ito == num) {
int imod = ito % pack_width;
int imod = ito & (pack_width - 1);
if (imod) e_ito += pack_width - imod;
}
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
@ -241,6 +242,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
flt_t * _noalias const tz = ncachez + toffs;
int * _noalias const tj = ncachej + toffs;
int * _noalias const tjtype = ncachejtype + toffs;
int * _noalias const ttag = ncachetag + toffs;
flt_t * _noalias itx;
flt_t * _noalias ity;
@ -287,13 +289,14 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
ty[u] = x[j].y;
tz[u] = x[j].z;
tjtype[u] = x[j].w;
if (THREE) ttag[u] = tag[j];
}
if (FULL == 0 || TRI == 1) {
icount = 0;
istart = ncount;
const int alignb = INTEL_DATA_ALIGN / sizeof(int);
int nedge = istart % alignb;
int nedge = istart & (alignb - 1);
if (nedge) istart + (alignb - nedge);
itx = tx + istart;
ity = ty + istart;
@ -343,7 +346,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
// i bin (half) check and offload ghost check
if (j < nlocal) {
const int ijmod = (i + j) % 2;
const int ijmod = (i + j) & 1;
if (i > j) {
if (ijmod == 0) addme = 0;
} else if (i < j) {
@ -424,8 +427,6 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
}
#endif
int pj;
if (THREE) pj = j;
if (need_ic) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
@ -434,12 +435,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
}
if (THREE) {
const int jtag = tag[pj];
const int jtag = ttag[u];
int flist = 0;
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) flist = 1;
if (((itag+jtag) & 1) == 0) flist = 1;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) flist = 1;
if (((itag+jtag) & 1) == 1) flist = 1;
} else {
if (tz[u] < ztmp) flist = 1;
else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1;
@ -512,7 +513,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
cnumneigh[i] += lane;
numneigh[i] = ns;
} else {
int edge = (n % pad_width);
int edge = n & (pad_width - 1);
if (edge) {
const int pad_end = n + (pad_width - edge);
#if defined(LMP_SIMD_COMPILER)
@ -532,7 +533,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
if (lane == pack_width) {
ct += max_chunk * pack_width;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
const int edge = (ct % alignb);
const int edge = ct & (alignb - 1);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
max_chunk = 0;
@ -548,7 +549,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
} else {
ct += n;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
const int edge = (ct % alignb);
const int edge = ct & (alignb - 1);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
if (ct + obound > list_size) {

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,110 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(airebo/intel,PairAIREBOIntel)
#else
#ifndef LMP_PAIR_AIREBO_INTEL_H
#define LMP_PAIR_AIREBO_INTEL_H
#include "pair.h"
#include "fix_intel.h"
#include "pair_airebo.h"
//#include "airebo_common.h"
namespace LAMMPS_NS {
template<class flt_t, class acc_t>
struct PairAIREBOIntelParam;
class PairAIREBOIntel : public PairAIREBO {
public:
PairAIREBOIntel(class LAMMPS *);
virtual ~PairAIREBOIntel();
virtual void compute(int, int);
virtual void init_style();
protected:
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers);
template <int EVFLAG, int EFLAG, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(IntelBuffers<flt_t,acc_t> * buffers);
template <class flt_t, class acc_t>
PairAIREBOIntelParam<flt_t,acc_t> get_param();
FixIntel * fix;
int _cop;
int * REBO_cnumneigh;
int * REBO_num_skin;
int * REBO_list_data;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair style AIREBO requires atom IDs
This is a requirement to use the AIREBO potential.
E: Pair style AIREBO requires newton pair on
See the newton command. This is a restriction to use the AIREBO
potential.
E: All pair coeffs are not set
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
E: Neighbor list overflow, boost neigh_modify one
There are too many neighbors of a single atom. Use the neigh_modify
command to increase the max number of neighbors allowed for one atom.
You may also want to boost the page size.
E: Cannot open AIREBO potential file %s
The specified AIREBO potential file cannot be opened. Check that the
path and name are correct.
*/

View File

@ -0,0 +1,37 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#include "pair_airebo_morse_intel.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairAIREBOMorseIntel::PairAIREBOMorseIntel(LAMMPS *lmp)
: PairAIREBOIntel(lmp) {}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairAIREBOMorseIntel::settings(int narg, char **arg)
{
PairAIREBOIntel::settings(narg,arg);
morseflag = 1;
}

View File

@ -0,0 +1,40 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(airebo/morse/intel,PairAIREBOMorseIntel)
#else
#ifndef LMP_PAIR_AIREBO_MORSE_INTEL_H
#define LMP_PAIR_AIREBO_MORSE_INTEL_H
#include "pair_airebo_intel.h"
namespace LAMMPS_NS {
class PairAIREBOMorseIntel : public PairAIREBOIntel {
public:
PairAIREBOMorseIntel(class LAMMPS *);
virtual void settings(int, char **);
};
}
#endif
#endif

View File

@ -0,0 +1,326 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
------------------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_eam_alloy_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
/* ---------------------------------------------------------------------- */
PairEAMAlloyIntel::PairEAMAlloyIntel(LAMMPS *lmp) : PairEAMIntel(lmp)
{
one_coeff = 1;
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
read DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMAlloyIntel::coeff(int narg, char **arg)
{
int i,j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all(FLERR,"Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all(FLERR,"Incorrect args for pair coefficients");
// read EAM setfl file
if (setfl) {
for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
delete [] setfl->elements;
delete [] setfl->mass;
memory->destroy(setfl->frho);
memory->destroy(setfl->rhor);
memory->destroy(setfl->z2r);
delete setfl;
}
setfl = new Setfl();
read_file(arg[2]);
// read args that map atom types to elements in potential file
// map[i] = which element the Ith atom type is, -1 if NULL
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
continue;
}
for (j = 0; j < setfl->nelements; j++)
if (strcmp(arg[i],setfl->elements[j]) == 0) break;
if (j < setfl->nelements) map[i-2] = j;
else error->all(FLERR,"No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
int count = 0;
for (i = 1; i <= n; i++) {
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(FLERR,i,setfl->mass[map[i]]);
count++;
}
scale[i][j] = 1.0;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
read a multi-element DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMAlloyIntel::read_file(char *filename)
{
Setfl *file = setfl;
// open potential file
int me = comm->me;
FILE *fptr;
char line[MAXLINE];
if (me == 0) {
fptr = force->open_potential(filename);
if (fptr == NULL) {
char str[128];
sprintf(str,"Cannot open EAM potential file %s",filename);
error->one(FLERR,str);
}
}
// read and broadcast header
// extract element names from nelements line
int n;
if (me == 0) {
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
n = strlen(line) + 1;
}
MPI_Bcast(&n,1,MPI_INT,0,world);
MPI_Bcast(line,n,MPI_CHAR,0,world);
sscanf(line,"%d",&file->nelements);
int nwords = atom->count_words(line);
if (nwords != file->nelements + 1)
error->all(FLERR,"Incorrect element names in EAM potential file");
char **words = new char*[file->nelements+1];
nwords = 0;
strtok(line," \t\n\r\f");
while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
file->elements = new char*[file->nelements];
for (int i = 0; i < file->nelements; i++) {
n = strlen(words[i]) + 1;
file->elements[i] = new char[n];
strcpy(file->elements[i],words[i]);
}
delete [] words;
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg %d %lg %lg",
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
}
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
file->mass = new double[file->nelements];
memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
"pair:z2r");
int i,j,tmp;
for (i = 0; i < file->nelements; i++) {
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
}
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
}
for (i = 0; i < file->nelements; i++)
for (j = 0; j <= i; j++) {
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
// close the potential file
if (me == 0) fclose(fptr);
}
/* ----------------------------------------------------------------------
copy read-in setfl potential to standard array format
------------------------------------------------------------------------- */
void PairEAMAlloyIntel::file2array()
{
int i,j,m,n;
int ntypes = atom->ntypes;
// set function params directly from setfl file
nrho = setfl->nrho;
nr = setfl->nr;
drho = setfl->drho;
dr = setfl->dr;
rhomax = (nrho-1) * drho;
// ------------------------------------------------------------------
// setup frho arrays
// ------------------------------------------------------------------
// allocate frho arrays
// nfrho = # of setfl elements + 1 for zero array
nfrho = setfl->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
// copy each element's frho to global frho
for (i = 0; i < setfl->nelements; i++)
for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
// ------------------------------------------------------------------
// setup rhor arrays
// ------------------------------------------------------------------
// allocate rhor arrays
// nrhor = # of setfl elements
nrhor = setfl->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
// copy each element's rhor to global rhor
for (i = 0; i < setfl->nelements; i++)
for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
// for setfl files, I,J mapping only depends on I
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i];
// ------------------------------------------------------------------
// setup z2r arrays
// ------------------------------------------------------------------
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of setfl elements
nz2r = setfl->nelements * (setfl->nelements+1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
n = 0;
for (i = 0; i < setfl->nelements; i++)
for (j = 0; j <= i; j++) {
for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
n++;
}
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
// set of z2r arrays only fill lower triangular Nelement matrix
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
// swap indices when irow < icol to stay lower triangular
// if map = -1 (non-EAM atom in pair hybrid):
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];
icol = map[j];
if (irow == -1 || icol == -1) {
type2z2r[i][j] = 0;
continue;
}
if (irow < icol) {
irow = map[j];
icol = map[i];
}
n = 0;
for (m = 0; m < irow; m++) n += m + 1;
n += icol;
type2z2r[i][j] = n;
}
}
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/alloy/intel,PairEAMAlloyIntel)
#else
#ifndef LMP_PAIR_EAM_ALLOY_INTEL_H
#define LMP_PAIR_EAM_ALLOY_INTEL_H
#include "pair_eam_intel.h"
namespace LAMMPS_NS {
// need virtual public b/c of how eam/alloy/opt inherits from it
class PairEAMAlloyIntel : virtual public PairEAMIntel {
public:
PairEAMAlloyIntel(class LAMMPS *);
virtual ~PairEAMAlloyIntel() {}
void coeff(int, char **);
protected:
void read_file(char *);
void file2array();
};
}
#endif
#endif

View File

@ -0,0 +1,335 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Tim Lau (MIT)
------------------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_eam_fs_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
/* ---------------------------------------------------------------------- */
PairEAMFSIntel::PairEAMFSIntel(LAMMPS *lmp) : PairEAMIntel(lmp)
{
one_coeff = 1;
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
read EAM Finnis-Sinclair file
------------------------------------------------------------------------- */
void PairEAMFSIntel::coeff(int narg, char **arg)
{
int i,j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all(FLERR,"Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all(FLERR,"Incorrect args for pair coefficients");
// read EAM Finnis-Sinclair file
if (fs) {
for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
delete [] fs->elements;
delete [] fs->mass;
memory->destroy(fs->frho);
memory->destroy(fs->rhor);
memory->destroy(fs->z2r);
delete fs;
}
fs = new Fs();
read_file(arg[2]);
// read args that map atom types to elements in potential file
// map[i] = which element the Ith atom type is, -1 if NULL
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
continue;
}
for (j = 0; j < fs->nelements; j++)
if (strcmp(arg[i],fs->elements[j]) == 0) break;
if (j < fs->nelements) map[i-2] = j;
else error->all(FLERR,"No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
int count = 0;
for (i = 1; i <= n; i++) {
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(FLERR,i,fs->mass[map[i]]);
count++;
}
scale[i][j] = 1.0;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
read a multi-element DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMFSIntel::read_file(char *filename)
{
Fs *file = fs;
// open potential file
int me = comm->me;
FILE *fptr;
char line[MAXLINE];
if (me == 0) {
fptr = force->open_potential(filename);
if (fptr == NULL) {
char str[128];
sprintf(str,"Cannot open EAM potential file %s",filename);
error->one(FLERR,str);
}
}
// read and broadcast header
// extract element names from nelements line
int n;
if (me == 0) {
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
n = strlen(line) + 1;
}
MPI_Bcast(&n,1,MPI_INT,0,world);
MPI_Bcast(line,n,MPI_CHAR,0,world);
sscanf(line,"%d",&file->nelements);
int nwords = atom->count_words(line);
if (nwords != file->nelements + 1)
error->all(FLERR,"Incorrect element names in EAM potential file");
char **words = new char*[file->nelements+1];
nwords = 0;
strtok(line," \t\n\r\f");
while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
file->elements = new char*[file->nelements];
for (int i = 0; i < file->nelements; i++) {
n = strlen(words[i]) + 1;
file->elements[i] = new char[n];
strcpy(file->elements[i],words[i]);
}
delete [] words;
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg %d %lg %lg",
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
}
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
file->mass = new double[file->nelements];
memory->create(file->frho,file->nelements,file->nrho+1,
"pair:frho");
memory->create(file->rhor,file->nelements,file->nelements,
file->nr+1,"pair:rhor");
memory->create(file->z2r,file->nelements,file->nelements,
file->nr+1,"pair:z2r");
int i,j,tmp;
for (i = 0; i < file->nelements; i++) {
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
}
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
for (j = 0; j < file->nelements; j++) {
if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
}
for (i = 0; i < file->nelements; i++)
for (j = 0; j <= i; j++) {
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
// close the potential file
if (me == 0) fclose(fptr);
}
/* ----------------------------------------------------------------------
copy read-in setfl potential to standard array format
------------------------------------------------------------------------- */
void PairEAMFSIntel::file2array()
{
int i,j,m,n;
int ntypes = atom->ntypes;
// set function params directly from fs file
nrho = fs->nrho;
nr = fs->nr;
drho = fs->drho;
dr = fs->dr;
rhomax = (nrho-1) * drho;
// ------------------------------------------------------------------
// setup frho arrays
// ------------------------------------------------------------------
// allocate frho arrays
// nfrho = # of fs elements + 1 for zero array
nfrho = fs->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
// copy each element's frho to global frho
for (i = 0; i < fs->nelements; i++)
for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
// ------------------------------------------------------------------
// setup rhor arrays
// ------------------------------------------------------------------
// allocate rhor arrays
// nrhor = square of # of fs elements
nrhor = fs->nelements * fs->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
// copy each element pair rhor to global rhor
n = 0;
for (i = 0; i < fs->nelements; i++)
for (j = 0; j < fs->nelements; j++) {
for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
n++;
}
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
// for fs files, there is a full NxN set of rhor arrays
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i] * fs->nelements + map[j];
// ------------------------------------------------------------------
// setup z2r arrays
// ------------------------------------------------------------------
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of fs elements
nz2r = fs->nelements * (fs->nelements+1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
n = 0;
for (i = 0; i < fs->nelements; i++)
for (j = 0; j <= i; j++) {
for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
n++;
}
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
// set of z2r arrays only fill lower triangular Nelement matrix
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
// swap indices when irow < icol to stay lower triangular
// if map = -1 (non-EAM atom in pair hybrid):
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];
icol = map[j];
if (irow == -1 || icol == -1) {
type2z2r[i][j] = 0;
continue;
}
if (irow < icol) {
irow = map[j];
icol = map[i];
}
n = 0;
for (m = 0; m < irow; m++) n += m + 1;
n += icol;
type2z2r[i][j] = n;
}
}
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/fs/intel,PairEAMFSIntel)
#else
#ifndef LMP_PAIR_EAM_FS_INTEL_H
#define LMP_PAIR_EAM_FS_INTEL_H
#include "pair_eam_intel.h"
namespace LAMMPS_NS {
// need virtual public b/c of how eam/fs/opt inherits from it
class PairEAMFSIntel : virtual public PairEAMIntel {
public:
PairEAMFSIntel(class LAMMPS *);
virtual ~PairEAMFSIntel() {}
void coeff(int, char **);
protected:
void read_file(char *);
void file2array();
};
}
#endif
#endif

View File

@ -428,7 +428,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
} else
multiple_forms = true;
}
const int edge = (packed_j % pad_width);
const int edge = packed_j & (pad_width - 1);
if (edge) {
const int packed_end = packed_j + (pad_width - edge);
#if defined(LMP_SIMD_COMPILER)

View File

@ -0,0 +1,595 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include <math.h>
#include "pair_lj_charmm_coul_charmm_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "group.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "memory.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define LJ_T typename IntelBuffers<flt_t,flt_t>::vec4_t
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulCharmmIntel::PairLJCharmmCoulCharmmIntel(LAMMPS *lmp) :
PairLJCharmmCoulCharmm(lmp)
{
suffix_flag |= Suffix::INTEL;
}
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulCharmmIntel::~PairLJCharmmCoulCharmmIntel()
{
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag)
{
if (fix->precision()==FixIntel::PREC_MODE_MIXED)
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
force_const_single);
else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE)
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
force_const_double);
else
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
force_const_single);
fix->balance_stamp();
vflag_fdotr = 0;
}
template <class flt_t, class acc_t>
void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int inum = list->inum;
const int nthreads = comm->nthreads;
const int host_start = fix->host_start_pair();
const int offload_end = fix->offload_end_pair();
const int ago = neighbor->ago;
if (ago != 0 && fix->separate_buffers() == 0) {
fix->start_watch(TIME_PACK);
int packthreads;
if (nthreads > INTEL_HTHREADS) packthreads = nthreads;
else packthreads = 1;
#if defined(_OPENMP)
#pragma omp parallel if(packthreads > 1)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
packthreads, sizeof(ATOM_T));
buffers->thr_pack(ifrom,ito,ago);
}
fix->stop_watch(TIME_PACK);
}
// -------------------- Regular version
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
}
/* ---------------------------------------------------------------------- */
template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc,
const int astart, const int aend)
{
const int inum = aend - astart;
if (inum == 0) return;
int nlocal, nall, minlocal;
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * _noalias const x = buffers->get_x(offload);
flt_t * _noalias const q = buffers->get_q(offload);
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const flt_t * _noalias const special_coul = fc.special_coul;
const flt_t * _noalias const special_lj = fc.special_lj;
const flt_t qqrd2e = force->qqrd2e;
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
const flt_t inv_denom_coul = (flt_t)1.0/denom_coul;
const flt_t * _noalias const cutsq = fc.cutsq[0];
const LJ_T * _noalias const lj = fc.lj[0];
const flt_t cut_ljsq = fc.cut_ljsq;
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
const flt_t cut_coul_innersq = fc.cut_coul_innersq;
const flt_t cut_coulsq = fc.cut_coulsq;
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
flt_t * _noalias const ccachex = buffers->get_ccachex();
flt_t * _noalias const ccachey = buffers->get_ccachey();
flt_t * _noalias const ccachez = buffers->get_ccachez();
flt_t * _noalias const ccachew = buffers->get_ccachew();
int * _noalias const ccachei = buffers->get_ccachei();
int * _noalias const ccachej = buffers->get_ccachej();
const int ccache_stride = _ccache_stride;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag,
buffers, offload, fix, separate_flag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
#ifdef _LMP_INTEL_OFFLOAD
int *overflow = fix->get_off_overflow_flag();
double *timer_compute = fix->off_watch_pair();
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
#pragma offload target(mic:_cop) if(offload) \
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
in(numneigh:length(0) alloc_if(0) free_if(0)) \
in(x:length(x_size) alloc_if(0) free_if(0)) \
in(q:length(q_size) alloc_if(0) free_if(0)) \
in(overflow:length(0) alloc_if(0) free_if(0)) \
in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
in(ccache_stride,nthreads,qqrd2e,inum,nall,ntypes,cut_coulsq) \
in(vflag,eatom,f_stride,separate_flag,offload) \
in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
in(inv_denom_coul,cut_coul_innersq) \
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(f_start)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
f_stride, x, q);
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
if (EFLAG) oevdwl = oecoul = (acc_t)0;
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int iifrom, iip, iito, tid;
IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads);
iifrom += astart;
iito += astart;
int foff;
if (NEWTON_PAIR) foff = tid * f_stride - minlocal;
else foff = -minlocal;
FORCE_T * _noalias const f = f_start + foff;
if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
flt_t cutboth = cut_coulsq;
const int toffs = tid * ccache_stride;
flt_t * _noalias const tdelx = ccachex + toffs;
flt_t * _noalias const tdely = ccachey + toffs;
flt_t * _noalias const tdelz = ccachez + toffs;
flt_t * _noalias const trsq = ccachew + toffs;
int * _noalias const tj = ccachei + toffs;
int * _noalias const tjtype = ccachej + toffs;
for (int i = iifrom; i < iito; i += iip) {
// const int i = ilist[ii];
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
const LJ_T * _noalias const lji = lj + ptr_off;
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp,fytmp,fztmp,fwtmp;
acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5;
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const flt_t qtmp = q[i];
fxtmp = fytmp = fztmp = (acc_t)0;
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
if (NEWTON_PAIR == 0)
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
int ej = 0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq < cut_coulsq) {
trsq[ej]=rsq;
tdelx[ej]=delx;
tdely[ej]=dely;
tdelz[ej]=delz;
tjtype[ej]=x[j].w;
tj[ej]=jlist[jj];
ej++;
}
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < ej; jj++) {
flt_t forcecoul, forcelj, evdwl;
forcecoul = forcelj = evdwl = (flt_t)0.0;
const int j = tj[jj] & NEIGHMASK;
const int sbindex = tj[jj] >> SBBITS & 3;
const flt_t rsq = trsq[jj];
const flt_t r2inv = (flt_t)1.0 / rsq;
const flt_t r_inv = (flt_t)1.0 / sqrt(rsq);
forcecoul = qqrd2e * qtmp * q[j] * r_inv;
if (rsq > cut_coul_innersq) {
const flt_t ccr = cut_coulsq - rsq;
const flt_t switch1 = ccr * ccr * inv_denom_coul *
(cut_coulsq + (flt_t)2.0 * rsq - (flt_t)3.0 * cut_coul_innersq);
forcecoul *= switch1;
}
#ifdef INTEL_VMASK
if (rsq < cut_ljsq) {
#endif
const int jtype = tjtype[jj];
flt_t r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
#ifdef INTEL_VMASK
if (rsq > cut_lj_innersq) {
#endif
const flt_t drsq = cut_ljsq - rsq;
const flt_t cut2 = (rsq - cut_lj_innersq) * drsq;
const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) *
inv_denom_lj;
const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj;
if (EFLAG) {
#ifndef INTEL_VMASK
if (rsq > cut_lj_innersq) {
#endif
forcelj = forcelj * switch1 + evdwl * switch2;
evdwl *= switch1;
#ifndef INTEL_VMASK
}
#endif
} else {
const flt_t philj = r6inv * (lji[jtype].z*r6inv -
lji[jtype].w);
#ifndef INTEL_VMASK
if (rsq > cut_lj_innersq)
#endif
forcelj = forcelj * switch1 + philj * switch2;
}
#ifdef INTEL_VMASK
}
#endif
#ifdef INTEL_VMASK
}
#else
if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
#endif
if (sbindex) {
const flt_t factor_coul = special_coul[sbindex];
forcecoul *= factor_coul;
const flt_t factor_lj = special_lj[sbindex];
forcelj *= factor_lj;
if (EFLAG) evdwl *= factor_lj;
}
const flt_t fpair = (forcecoul + forcelj) * r2inv;
const flt_t fpx = fpair * tdelx[jj];
fxtmp += fpx;
if (NEWTON_PAIR) f[j].x -= fpx;
const flt_t fpy = fpair * tdely[jj];
fytmp += fpy;
if (NEWTON_PAIR) f[j].y -= fpy;
const flt_t fpz = fpair * tdelz[jj];
fztmp += fpz;
if (NEWTON_PAIR) f[j].z -= fpz;
if (EFLAG) {
sevdwl += evdwl;
secoul += forcecoul;
if (eatom) {
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * forcecoul;
if (NEWTON_PAIR)
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * forcecoul;
}
}
if (NEWTON_PAIR == 0)
IP_PRE_ev_tally_nborv(vflag, tdelx[jj], tdely[jj], tdelz[jj],
fpx, fpy, fpz);
} // for jj
if (NEWTON_PAIR) {
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
} else {
f[i].x = fxtmp;
f[i].y = fytmp;
f[i].z = fztmp;
}
IP_PRE_ev_tally_atomq(NEWTON_PAIR, EFLAG, vflag, f, fwtmp);
} // for ii
IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start,
f_stride, x, offload, vflag, ov0, ov1, ov2, ov3,
ov4, ov5);
} // end of omp parallel region
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
ov0, ov1, ov2, ov3, ov4, ov5);
if (EFLAG) {
if (NEWTON_PAIR == 0) {
oevdwl *= (acc_t)0.5;
oecoul *= (acc_t)0.5;
}
ev_global[0] = oevdwl;
ev_global[1] = oecoul;
}
if (vflag) {
if (NEWTON_PAIR == 0) {
ov0 *= (acc_t)0.5;
ov1 *= (acc_t)0.5;
ov2 *= (acc_t)0.5;
ov3 *= (acc_t)0.5;
ov4 *= (acc_t)0.5;
ov5 *= (acc_t)0.5;
}
ev_global[2] = ov0;
ev_global[3] = ov1;
ev_global[4] = ov2;
ev_global[5] = ov3;
ev_global[6] = ov4;
ev_global[7] = ov5;
}
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end of offload region
if (offload)
fix->stop_watch(TIME_OFFLOAD_LATENCY);
else
fix->stop_watch(TIME_HOST_PAIR);
if (EFLAG || vflag)
fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
else
fix->add_result_array(f_start, 0, offload);
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmIntel::init_style()
{
PairLJCharmmCoulCharmm::init_style();
if (force->newton_pair == 0) {
neighbor->requests[neighbor->nrequest-1]->half = 0;
neighbor->requests[neighbor->nrequest-1]->full = 1;
}
neighbor->requests[neighbor->nrequest-1]->intel = 1;
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
fix = static_cast<FixIntel *>(modify->fix[ifix]);
fix->pair_init_check();
#ifdef _LMP_INTEL_OFFLOAD
_cop = fix->coprocessor_number();
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
pack_force_const(force_const_single, fix->get_mixed_buffers());
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
pack_force_const(force_const_double, fix->get_double_buffers());
else
pack_force_const(force_const_single, fix->get_single_buffers());
}
template <class flt_t, class acc_t>
void PairLJCharmmCoulCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t,acc_t> *buffers)
{
int off_ccache = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (_cop >= 0) off_ccache = 1;
#endif
buffers->grow_ccache(off_ccache, comm->nthreads, 1);
_ccache_stride = buffers->ccache_stride();
int tp1 = atom->ntypes + 1;
fc.set_ntypes(tp1, memory, _cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();
// Repeat cutsq calculation because done after call to init_style
double cut, cutneigh;
if (cut_lj > cut_coul)
error->all(FLERR,
"Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic");
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i, j);
cutneigh = cut + neighbor->skin;
cutsq[i][j] = cutsq[j][i] = cut*cut;
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
}
}
}
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq, cut_coulsq);
fc.cut_coulsq = cut_coulsq;
fc.cut_ljsq = cut_ljsq;
fc.cut_coul_innersq = cut_coul_innersq;
fc.cut_lj_innersq = cut_lj_innersq;
for (int i = 0; i < 4; i++) {
fc.special_lj[i] = force->special_lj[i];
fc.special_coul[i] = force->special_coul[i];
fc.special_coul[0] = 1.0;
fc.special_lj[0] = 1.0;
}
for (int i = 0; i < tp1; i++) {
for (int j = 0; j < tp1; j++) {
fc.lj[i][j].x = lj1[i][j];
fc.lj[i][j].y = lj2[i][j];
fc.lj[i][j].z = lj3[i][j];
fc.lj[i][j].w = lj4[i][j];
fc.cutsq[i][j] = cutsq[i][j];
}
}
#ifdef _LMP_INTEL_OFFLOAD
if (_cop < 0) return;
flt_t * special_lj = fc.special_lj;
flt_t * special_coul = fc.special_coul;
flt_t * cutsq = fc.cutsq[0];
LJ_T * lj = fc.lj[0];
flt_t * ocutneighsq = cutneighsq[0];
int tp1sq = tp1 * tp1;
#pragma offload_transfer target(mic:_cop) \
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t>
void PairLJCharmmCoulCharmmIntel::ForceConst<flt_t>::set_ntypes(
const int ntypes, Memory *memory, const int cop) {
if (ntypes != _ntypes) {
if (_ntypes > 0) {
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
flt_t * ocutsq = cutsq[0];
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
ospecial_coul != NULL && cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \
nocopy(ocutsq, olj: alloc_if(0) free_if(1))
}
#endif
_memory->destroy(cutsq);
_memory->destroy(lj);
}
if (ntypes > 0) {
_cop = cop;
memory->create(cutsq,ntypes,ntypes,"fc.cutsq");
memory->create(lj,ntypes,ntypes,"fc.lj");
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
flt_t * ocutsq = cutsq[0];
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
int tp1sq = ntypes*ntypes;
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
ospecial_coul != NULL && cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \
nocopy(ocutsq,olj: length(tp1sq) alloc_if(1) free_if(0))
}
#endif
}
}
_ntypes=ntypes;
_memory=memory;
}

View File

@ -0,0 +1,100 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/charmm/coul/charmm/intel,PairLJCharmmCoulCharmmIntel)
#else
#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_INTEL_H
#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_INTEL_H
#include "pair_lj_charmm_coul_charmm.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class PairLJCharmmCoulCharmmIntel : public PairLJCharmmCoulCharmm {
public:
PairLJCharmmCoulCharmmIntel(class LAMMPS *);
virtual ~PairLJCharmmCoulCharmmIntel();
virtual void compute(int, int);
void init_style();
typedef struct { float x,y,z; int w; } sng4_t;
private:
FixIntel *fix;
int _cop, _ccache_stride;
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc);
template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const ForceConst<flt_t> &fc, const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t, acc_t> *buffers);
// ----------------------------------------------------------------------
template <class flt_t>
class ForceConst {
public:
_alignvar(flt_t special_coul[4],64);
_alignvar(flt_t special_lj[4],64);
flt_t **cutsq;
flt_t cut_coulsq, cut_ljsq;
flt_t cut_coul_innersq, cut_lj_innersq;
typename IntelBuffers<flt_t,flt_t>::vec4_t **lj;
ForceConst() : _ntypes(0) {}
~ForceConst() { set_ntypes(0,NULL,_cop); }
void set_ntypes(const int ntypes, Memory *memory, const int cop);
private:
int _ntypes, _cop;
Memory *_memory;
};
ForceConst<float> force_const_single;
ForceConst<double> force_const_double;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
E: Intel varient of lj/charmm/coul/charmm expects lj cutoff<=coulombic
The intel accelerated version of the CHARMM style requires that the
Lennard-Jones cutoff is not greater than the coulombic cutoff.
*/

View File

@ -0,0 +1,42 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#include "pair_rebo_intel.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairREBOIntel::PairREBOIntel(LAMMPS *lmp) : PairAIREBOIntel(lmp) {}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairREBOIntel::settings(int narg, char **arg)
{
if (narg != 0) error->all(FLERR,"Illegal pair_style command");
cutlj = 0.0;
ljflag = torflag = 0;
//
// this one parameter for C-C interactions is different in REBO vs AIREBO
// see Favata, Micheletti, Ryu, Pugno, Comp Phys Comm (2016)
PCCf_2_0 = 0.0;
}

View File

@ -0,0 +1,40 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(rebo/intel,PairREBOIntel)
#else
#ifndef LMP_PAIR_REBO_INTEL_H
#define LMP_PAIR_REBO_INTEL_H
#include "pair_airebo_intel.h"
namespace LAMMPS_NS {
class PairREBOIntel : public PairAIREBOIntel {
public:
PairREBOIntel(class LAMMPS *);
virtual void settings(int, char **);
};
}
#endif
#endif

View File

@ -345,16 +345,17 @@ void PairSWIntel::eval(const int offload, const int vflag,
if (jj < jnumhalf) ejnumhalf++;
}
}
int ejnum_pad = ejnum;
while ( (ejnum_pad % pad_width) != 0) {
tdelx[ejnum_pad] = (flt_t)0.0;
tdely[ejnum_pad] = (flt_t)0.0;
tdelz[ejnum_pad] = (flt_t)0.0;
trsq[ejnum_pad] = p2[3].cutsq + (flt_t)1.0;
tj[ejnum_pad] = nall;
if (!ONETYPE) tjtype[ejnum_pad] = 0;
ejnum_pad++;
int ejrem = ejnum & (pad_width - 1);
if (ejrem) ejrem = pad_width - ejrem;
const int ejnum_pad = ejnum + ejrem;
for (int jj = ejnum; jj < ejnum_pad; jj++) {
tdelx[jj] = (flt_t)0.0;
tdely[jj] = (flt_t)0.0;
tdelz[jj] = (flt_t)0.0;
trsq[jj] = p2[3].cutsq + (flt_t)1.0;
tj[jj] = nall;
if (!ONETYPE) tjtype[jj] = 0;
}
#if defined(LMP_SIMD_COMPILER)