USER-INTEL add-ons from Mike

This commit is contained in:
Steve Plimpton 2017-08-15 17:12:07 -06:00
parent 0b3f1b8a15
commit 1d4d2155a2
35 changed files with 9675 additions and 49 deletions

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 20 KiB

View File

@ -29,8 +29,10 @@ Bond Styles: fene, harmonic :l
Dihedral Styles: charmm, harmonic, opls :l Dihedral Styles: charmm, harmonic, opls :l
Fixes: nve, npt, nvt, nvt/sllod :l Fixes: nve, npt, nvt, nvt/sllod :l
Improper Styles: cvff, harmonic :l Improper Styles: cvff, harmonic :l
Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne, Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long,
charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l buck, eam, eam/alloy, eam/fs, gayberne, charmm/coul/charmm,
charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, rebo,
sw, tersoff :l
K-Space Styles: pppm, pppm/disp :l K-Space Styles: pppm, pppm/disp :l
:ule :ule

View File

@ -7,10 +7,13 @@
:line :line
pair_style airebo command :h3 pair_style airebo command :h3
pair_style airebo/intel command :h3
pair_style airebo/omp command :h3 pair_style airebo/omp command :h3
pair_style airebo/morse command :h3 pair_style airebo/morse command :h3
pair_style airebo/morse/intel command :h3
pair_style airebo/morse/omp command :h3 pair_style airebo/morse/omp command :h3
pair_style rebo command :h3 pair_style rebo command :h3
pair_style rebo/intel command :h3
pair_style rebo/omp command :h3 pair_style rebo/omp command :h3
[Syntax:] [Syntax:]

View File

@ -7,6 +7,7 @@
:line :line
pair_style lj/charmm/coul/charmm command :h3 pair_style lj/charmm/coul/charmm command :h3
pair_style lj/charmm/coul/charmm/intel command :h3
pair_style lj/charmm/coul/charmm/omp command :h3 pair_style lj/charmm/coul/charmm/omp command :h3
pair_style lj/charmm/coul/charmm/implicit command :h3 pair_style lj/charmm/coul/charmm/implicit command :h3
pair_style lj/charmm/coul/charmm/implicit/omp command :h3 pair_style lj/charmm/coul/charmm/implicit/omp command :h3

View File

@ -14,6 +14,7 @@ pair_style eam/omp command :h3
pair_style eam/opt command :h3 pair_style eam/opt command :h3
pair_style eam/alloy command :h3 pair_style eam/alloy command :h3
pair_style eam/alloy/gpu command :h3 pair_style eam/alloy/gpu command :h3
pair_style eam/alloy/intel command :h3
pair_style eam/alloy/kk command :h3 pair_style eam/alloy/kk command :h3
pair_style eam/alloy/omp command :h3 pair_style eam/alloy/omp command :h3
pair_style eam/alloy/opt command :h3 pair_style eam/alloy/opt command :h3
@ -21,6 +22,7 @@ pair_style eam/cd command :h3
pair_style eam/cd/omp command :h3 pair_style eam/cd/omp command :h3
pair_style eam/fs command :h3 pair_style eam/fs command :h3
pair_style eam/fs/gpu command :h3 pair_style eam/fs/gpu command :h3
pair_style eam/fs/intel command :h3
pair_style eam/fs/kk command :h3 pair_style eam/fs/kk command :h3
pair_style eam/fs/omp command :h3 pair_style eam/fs/omp command :h3
pair_style eam/fs/opt command :h3 pair_style eam/fs/opt command :h3

View File

@ -14,7 +14,7 @@ SHFLAGS = -fPIC
DEPFLAGS = -M DEPFLAGS = -M
LINK = mpiicpc LINK = mpiicpc
LINKFLAGS = -g -qopenmp $(OPTFLAGS) LINKFLAGS = -qopenmp $(OPTFLAGS)
LIB = -ltbbmalloc LIB = -ltbbmalloc
SIZE = size SIZE = size

View File

@ -7,7 +7,7 @@ SHELL = /bin/sh
# specify flags and libraries needed for your compiler # specify flags and libraries needed for your compiler
CC = mpicxx -cxx=icc CC = mpicxx -cxx=icc
OPTFLAGS = -xAVX -O2 -fp-model fast=2 -no-prec-div -qoverride-limits OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS) -fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC SHFLAGS = -fPIC

View File

@ -8,7 +8,7 @@ SHELL = /bin/sh
export OMPI_CXX = icc export OMPI_CXX = icc
CC = mpicxx CC = mpicxx
OPTFLAGS = -xAVX -O2 -fp-model fast=2 -no-prec-div -qoverride-limits OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits
CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \
-fno-alias -ansi-alias -restrict $(OPTFLAGS) -fno-alias -ansi-alias -restrict $(OPTFLAGS)
SHFLAGS = -fPIC SHFLAGS = -fPIC

View File

@ -46,6 +46,7 @@ action npair_intel.h
action npair_intel.cpp action npair_intel.cpp
action intel_simd.h pair_sw_intel.cpp action intel_simd.h pair_sw_intel.cpp
action intel_intrinsics.h pair_tersoff_intel.cpp action intel_intrinsics.h pair_tersoff_intel.cpp
action intel_intrinsics_airebo.h pair_airebo_intel.cpp
action verlet_lrt_intel.h pppm.cpp action verlet_lrt_intel.h pppm.cpp
action verlet_lrt_intel.cpp pppm.cpp action verlet_lrt_intel.cpp pppm.cpp

View File

@ -4,9 +4,9 @@
-------------------------------- --------------------------------
W. Michael Brown (Intel) michael.w.brown at intel.com W. Michael Brown (Intel) michael.w.brown at intel.com
Markus Hohnerbach (RWTH Aachen University)
William McDoniel (RWTH Aachen University) William McDoniel (RWTH Aachen University)
Rodrigo Canales (RWTH Aachen University) Rodrigo Canales (RWTH Aachen University)
Markus H<>hnerbach (RWTH Aachen University)
Stan Moore (Sandia) Stan Moore (Sandia)
Ahmed E. Ismail (RWTH Aachen University) Ahmed E. Ismail (RWTH Aachen University)
Paolo Bientinesi (RWTH Aachen University) Paolo Bientinesi (RWTH Aachen University)

View File

@ -8,6 +8,7 @@
# in.intel.sw - Silicon benchmark with Stillinger-Weber # in.intel.sw - Silicon benchmark with Stillinger-Weber
# in.intel.tersoff - Silicon benchmark with Tersoff # in.intel.tersoff - Silicon benchmark with Tersoff
# in.intel.water - Coarse-grain water benchmark using Stillinger-Weber # in.intel.water - Coarse-grain water benchmark using Stillinger-Weber
# in.intel.airebo - Polyethelene benchmark with AIREBO
# #
############################################################################# #############################################################################
@ -24,6 +25,7 @@
# in.intel.sw - 132.4 161.9 # in.intel.sw - 132.4 161.9
# in.intel.tersoff - 83.3 101.1 # in.intel.tersoff - 83.3 101.1
# in.intel.water - 53.4 90.3 # in.intel.water - 53.4 90.3
# in.intel.airebo - 7.3 11.8
# #
############################################################################# #############################################################################

View File

@ -0,0 +1,47 @@
# AIREBO polyethelene benchmark
variable N index on # Newton Setting
variable w index 10 # Warmup Timesteps
variable t index 550 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node
variable p index 0 # Use Power Measurement
variable x index 4
variable y index 2
variable z index 2
variable xx equal 17*$x
variable yy equal 16*$y
variable zz equal 2*$z
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
newton $N
if "$n > 0" then "processors * * * grid numa"
variable root getenv LMP_ROOT
units metal
atom_style atomic
read_data ${root}/examples/airebo/data.airebo
replicate ${xx} ${yy} ${zz}
neighbor 0.5 bin
neigh_modify delay 5 every 1
pair_style airebo 3.0 1 1
pair_coeff * * ${root}/potentials/CH.airebo C H
velocity all create 300.0 761341
fix 1 all nve
timestep 0.0005
thermo 50
if "$p > 0" then "run_style verlet/power"
if "$w > 0" then "run $w"
run ${rr}

View File

@ -5,7 +5,6 @@ variable w index 10 # Warmup Timesteps
variable t index 3100 # Main Run Timesteps variable t index 3100 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node variable n index 0 # Use NUMA Mapping for Multi-Node
variable b index 3 # Neighbor binsize
variable p index 0 # Use Power Measurement variable p index 0 # Use Power Measurement
variable x index 4 variable x index 4

View File

@ -5,7 +5,6 @@ variable w index 10 # Warmup Timesteps
variable t index 520 # Main Run Timesteps variable t index 520 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node variable n index 0 # Use NUMA Mapping for Multi-Node
variable b index 3 # Neighbor binsize
variable p index 0 # Use Power Measurement variable p index 0 # Use Power Measurement
variable c index 0 # 1 to use collectives for PPPM variable c index 0 # 1 to use collectives for PPPM
variable d index 1 # 1 to use 'diff ad' for PPPM variable d index 1 # 1 to use 'diff ad' for PPPM

View File

@ -30,6 +30,9 @@ IntelBuffers<flt_t, acc_t>::IntelBuffers(class LAMMPS *lmp_in) :
_off_map_listlocal = 0; _off_map_listlocal = 0;
_ccachex = 0; _ccachex = 0;
_ncache_alloc = 0; _ncache_alloc = 0;
_ncachetag = 0;
_cutneighsq = 0;
_cutneighghostsq = 0;
#ifdef _LMP_INTEL_OFFLOAD #ifdef _LMP_INTEL_OFFLOAD
_separate_buffers = 0; _separate_buffers = 0;
_off_f = 0; _off_f = 0;
@ -447,12 +450,17 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
flt_t *ncachez = _ncachez; flt_t *ncachez = _ncachez;
int *ncachej = _ncachej; int *ncachej = _ncachej;
int *ncachejtype = _ncachejtype; int *ncachejtype = _ncachejtype;
int *ncachetag = _ncachetag;
#ifdef _LMP_INTEL_OFFLOAD #ifdef _LMP_INTEL_OFFLOAD
if (_off_ncache) { if (_off_ncache) {
#pragma offload_transfer target(mic:_cop) \ #pragma offload_transfer target(mic:_cop) \
nocopy(ncachex,ncachey,ncachez,ncachej:alloc_if(0) free_if(1)) \ nocopy(ncachex,ncachey,ncachez,ncachej:alloc_if(0) free_if(1)) \
nocopy(ncachejtype:alloc_if(0) free_if(1)) nocopy(ncachejtype:alloc_if(0) free_if(1))
if (ncachetag) {
#pragma offload_transfer target(mic:_cop) \
nocopy(ncachetag:alloc_if(0) free_if(1))
}
} }
_off_ncache = 0; _off_ncache = 0;
#endif #endif
@ -462,8 +470,10 @@ void IntelBuffers<flt_t, acc_t>::free_ncache()
lmp->memory->destroy(ncachez); lmp->memory->destroy(ncachez);
lmp->memory->destroy(ncachej); lmp->memory->destroy(ncachej);
lmp->memory->destroy(ncachejtype); lmp->memory->destroy(ncachejtype);
if (ncachetag)
lmp->memory->destroy(ncachetag);
_ncache_alloc = 0; _ncache_alloc = 0;
_ncachetag = 0;
} }
} }
@ -480,7 +490,7 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
const int vsize = _ncache_stride * nt; const int vsize = _ncache_stride * nt;
if (_ncache_alloc) { if (_ncache_alloc) {
if (vsize > _ncache_alloc) if (vsize > _ncache_alloc || (need_tag() && _ncachetag == 0))
free_ncache(); free_ncache();
#ifdef _LMP_INTEL_OFFLOAD #ifdef _LMP_INTEL_OFFLOAD
else if (off_flag && _off_ncache == 0) else if (off_flag && _off_ncache == 0)
@ -495,6 +505,8 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
lmp->memory->create(_ncachez, vsize, "_ncachez"); lmp->memory->create(_ncachez, vsize, "_ncachez");
lmp->memory->create(_ncachej, vsize, "_ncachej"); lmp->memory->create(_ncachej, vsize, "_ncachej");
lmp->memory->create(_ncachejtype, vsize, "_ncachejtype"); lmp->memory->create(_ncachejtype, vsize, "_ncachejtype");
if (need_tag())
lmp->memory->create(_ncachetag, vsize, "_ncachetag");
_ncache_alloc = vsize; _ncache_alloc = vsize;
@ -513,6 +525,14 @@ void IntelBuffers<flt_t, acc_t>::grow_ncache(const int off_flag,
nocopy(ncachez,ncachej:length(vsize) alloc_if(1) free_if(0)) \ nocopy(ncachez,ncachej:length(vsize) alloc_if(1) free_if(0)) \
nocopy(ncachejtype:length(vsize) alloc_if(1) free_if(0)) nocopy(ncachejtype:length(vsize) alloc_if(1) free_if(0))
} }
int tsize = vsize;
if (!need_tag()) {
tsize = 16;
lmp->memory->create(_ncachetag, tsize, "_ncachetag");
}
int *ncachetag = _ncachetag;
#pragma offload_transfer target(mic:_cop) \
nocopy(ncachetag:length(tsize) alloc_if(1) free_if(0))
_off_ncache = 1; _off_ncache = 1;
} }
#endif #endif
@ -548,7 +568,8 @@ void IntelBuffers<flt_t, acc_t>::fdotr_reduce(const int nall,
/* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */
template <class flt_t, class acc_t> template <class flt_t, class acc_t>
void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes) void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes,
const int use_ghost_cut)
{ {
if (ntypes != _ntypes) { if (ntypes != _ntypes) {
if (_ntypes > 0) { if (_ntypes > 0) {
@ -558,16 +579,34 @@ void IntelBuffers<flt_t, acc_t>::set_ntypes(const int ntypes)
#pragma offload_transfer target(mic:_cop) \ #pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:alloc_if(0) free_if(1)) nocopy(cutneighsqo:alloc_if(0) free_if(1))
} }
flt_t * cutneighghostsqo;
if (_cutneighghostsq && _off_threads > 0 && cutneighghostsqo != 0) {
cutneighghostsqo = _cutneighghostsq[0];
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighghostsqo:alloc_if(0) free_if(1))
}
#endif #endif
lmp->memory->destroy(_cutneighsq); lmp->memory->destroy(_cutneighsq);
if (_cutneighghostsq != 0) lmp->memory->destroy(_cutneighghostsq);
} }
if (ntypes > 0) { if (ntypes > 0) {
lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq"); lmp->memory->create(_cutneighsq, ntypes, ntypes, "_cutneighsq");
if (use_ghost_cut)
lmp->memory->create(_cutneighghostsq, ntypes, ntypes,
"_cutneighghostsq");
#ifdef _LMP_INTEL_OFFLOAD #ifdef _LMP_INTEL_OFFLOAD
flt_t * cutneighsqo = _cutneighsq[0]; flt_t * cutneighsqo = _cutneighsq[0];
const int ntypes2 = ntypes * ntypes;
if (_off_threads > 0 && cutneighsqo != NULL) { if (_off_threads > 0 && cutneighsqo != NULL) {
#pragma offload_transfer target(mic:_cop) \ #pragma offload_transfer target(mic:_cop) \
nocopy(cutneighsqo:length(ntypes * ntypes) alloc_if(1) free_if(0)) nocopy(cutneighsqo:length(ntypes2) alloc_if(1) free_if(0))
}
if (use_ghost_cut) {
flt_t * cutneighghostsqo = _cutneighghostsq[0];
if (_off_threads > 0 && cutneighghostsqo != NULL) {
#pragma offload_transfer target(mic:_cop) \
nocopy(cutneighghostsqo:length(ntypes2) alloc_if(1) free_if(0))
}
} }
#endif #endif
} }

View File

@ -109,12 +109,14 @@ class IntelBuffers {
void free_ncache(); void free_ncache();
void grow_ncache(const int off_flag, const int nthreads); void grow_ncache(const int off_flag, const int nthreads);
void grow_ncachetag(const int off_flag, const int nthreads);
inline int ncache_stride() { return _ncache_stride; } inline int ncache_stride() { return _ncache_stride; }
inline flt_t * get_ncachex() { return _ncachex; } inline flt_t * get_ncachex() { return _ncachex; }
inline flt_t * get_ncachey() { return _ncachey; } inline flt_t * get_ncachey() { return _ncachey; }
inline flt_t * get_ncachez() { return _ncachez; } inline flt_t * get_ncachez() { return _ncachez; }
inline int * get_ncachej() { return _ncachej; } inline int * get_ncachej() { return _ncachej; }
inline int * get_ncachejtype() { return _ncachejtype; } inline int * get_ncachejtype() { return _ncachejtype; }
inline int * get_ncachetag() { return _ncachetag; }
inline int get_max_nbors() { inline int get_max_nbors() {
int mn = lmp->neighbor->oneatom * sizeof(int) / int mn = lmp->neighbor->oneatom * sizeof(int) /
@ -131,7 +133,7 @@ class IntelBuffers {
_grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width); _grow_nbor_list(list, nlocal, nthreads, offload_end, pack_width);
} }
void set_ntypes(const int ntypes); void set_ntypes(const int ntypes, const int use_ghost_cut = 0);
inline int * firstneigh(const NeighList *list) { return _list_alloc; } inline int * firstneigh(const NeighList *list) { return _list_alloc; }
inline int * cnumneigh(const NeighList *list) { return _cnumneigh; } inline int * cnumneigh(const NeighList *list) { return _cnumneigh; }
@ -162,6 +164,7 @@ class IntelBuffers {
inline void zero_ev() inline void zero_ev()
{ for (int i = 0; i < 8; i++) _ev_global[i] = _ev_global_host[i] = 0.0; } { for (int i = 0; i < 8; i++) _ev_global[i] = _ev_global_host[i] = 0.0; }
inline flt_t ** get_cutneighsq() { return _cutneighsq; } inline flt_t ** get_cutneighsq() { return _cutneighsq; }
inline flt_t ** get_cutneighghostsq() { return _cutneighghostsq; }
inline int get_off_threads() { return _off_threads; } inline int get_off_threads() { return _off_threads; }
#ifdef _LMP_INTEL_OFFLOAD #ifdef _LMP_INTEL_OFFLOAD
inline void set_off_params(const int n, const int cop, inline void set_off_params(const int n, const int cop,
@ -274,13 +277,10 @@ class IntelBuffers {
used_ghost * sizeof(flt_t)); used_ghost * sizeof(flt_t));
} }
} }
#endif
inline int need_tag() { return _need_tag; } inline int need_tag() { return _need_tag; }
inline void need_tag(const int nt) { _need_tag = nt; } inline void need_tag(const int nt) { _need_tag = nt; }
#else
inline int need_tag() { return 0; }
inline void need_tag(const int nt) { }
#endif
double memory_usage(const int nthreads); double memory_usage(const int nthreads);
@ -298,7 +298,7 @@ class IntelBuffers {
int _list_alloc_atoms; int _list_alloc_atoms;
int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked; int *_list_alloc, *_cnumneigh, *_atombin, *_binpacked;
flt_t **_cutneighsq; flt_t **_cutneighsq, **_cutneighghostsq;
int _ntypes; int _ntypes;
int _ccache_stride; int _ccache_stride;
@ -307,7 +307,10 @@ class IntelBuffers {
int _ncache_stride, _ncache_alloc; int _ncache_stride, _ncache_alloc;
flt_t *_ncachex, *_ncachey, *_ncachez; flt_t *_ncachex, *_ncachey, *_ncachez;
int *_ncachej, *_ncachejtype; int *_ncachej, *_ncachejtype, *_ncachetag;
int _need_tag, _host_nmax;
#ifdef LMP_USE_AVXCD #ifdef LMP_USE_AVXCD
int _ccache_stride3; int _ccache_stride3;
acc_t * _ccachef; acc_t * _ccachef;
@ -324,7 +327,6 @@ class IntelBuffers {
int *_off_map_special, *_off_map_nspecial, *_off_map_tag; int *_off_map_special, *_off_map_nspecial, *_off_map_tag;
int *_off_map_numneigh; int *_off_map_numneigh;
bool _off_list_alloc; bool _off_list_alloc;
int _need_tag, _host_nmax;
#endif #endif
int _buf_size, _buf_local_size; int _buf_size, _buf_local_size;

File diff suppressed because it is too large Load Diff

View File

@ -211,6 +211,8 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
for (i = nall-1; i >= nlocal; i--) { for (i = nall-1; i >= nlocal; i--) {
if (mask[i] & bitmask) { if (mask[i] & bitmask) {
ibin = coord2bin(atom->x[i]); ibin = coord2bin(atom->x[i]);
// Only necessary to store when neighboring ghost
atombin[i] = ibin;
bins[i] = binhead[ibin]; bins[i] = binhead[ibin];
binhead[ibin] = i; binhead[ibin] = i;
} }
@ -222,14 +224,10 @@ void NBinIntel::bin_atoms(IntelBuffers<flt_t,acc_t> * buffers) {
binhead[ibin] = i; binhead[ibin] = i;
} }
} else { } else {
for (i = nall-1; i >= nlocal; i--) { for (i = nall-1; i >= 0; i--) {
ibin = coord2bin(atom->x[i]); ibin = coord2bin(atom->x[i]);
bins[i] = binhead[ibin]; // Only necessary to store for ghost when neighboring ghost
binhead[ibin] = i; atombin[i] = ibin;
}
for (i = nlocal-1; i >= 0; i--) {
ibin = coord2bin(atom->x[i]);
atombin[i]=ibin;
bins[i] = binhead[ibin]; bins[i] = binhead[ibin];
binhead[ibin] = i; binhead[ibin] = i;
} }

View File

@ -0,0 +1,593 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include "npair_full_bin_ghost_intel.h"
#include "neighbor.h"
#include "nstencil.h"
#include "neigh_list.h"
#include "atom.h"
#include "atom_vec.h"
#include "molecule.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
NPairFullBinGhostIntel::NPairFullBinGhostIntel(LAMMPS *lmp) : NPairIntel(lmp) {}
/* ----------------------------------------------------------------------
binned neighbor list construction for all neighbors
include neighbors of ghost atoms, but no "special neighbors" for ghosts
every neighbor pair appears in list of both atoms i and j
------------------------------------------------------------------------- */
void NPairFullBinGhostIntel::build(NeighList *list)
{
#ifdef _LMP_INTEL_OFFLOAD
if (_fix->offload_noghost())
error->all(FLERR,
"The 'ghost no' option cannot be used with this USER-INTEL pair style.");
#endif
if (nstencil > INTEL_MAX_STENCIL_CHECK)
error->all(FLERR, "Too many neighbor bins for USER-INTEL package.");
#ifdef _LMP_INTEL_OFFLOAD
if (exclude)
error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
#endif
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
fbi(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
fbi(list, _fix->get_double_buffers());
else
fbi(list, _fix->get_single_buffers());
_fix->stop_watch(TIME_HOST_NEIGHBOR);
}
/* ---------------------------------------------------------------------- */
template<class flt_t, class acc_t>
void NPairFullBinGhostIntel::fbi(NeighList * list,
IntelBuffers<flt_t,acc_t> * buffers)
{
const int nlocal = atom->nlocal;
const int nall = atom->nlocal + atom->nghost;
list->inum = atom->nlocal;
list->gnum = atom->nghost;
int host_start = _fix->host_start_neighbor();
const int off_end = _fix->offload_end_neighbor();
#ifdef _LMP_INTEL_OFFLOAD
if (off_end) grow_stencil();
if (_fix->full_host_list()) host_start = 0;
int offload_noghost = _fix->offload_noghost();
#endif
// only uses offload_end_neighbor to check whether we are doing offloading
// at all, no need to correct this later
buffers->grow_list(list, nall, comm->nthreads, off_end,
_fix->nbor_pack_width());
int need_ic = 0;
if (atom->molecular)
dminimum_image_check(need_ic, neighbor->cutneighmax, neighbor->cutneighmax,
neighbor->cutneighmax);
if (need_ic) {
fbi<flt_t,acc_t,1>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,1>(0, list, buffers, host_start, nlocal);
} else {
fbi<flt_t,acc_t,0>(1, list, buffers, 0, off_end);
fbi<flt_t,acc_t,0>(0, list, buffers, host_start, nlocal);
}
}
/* ---------------------------------------------------------------------- */
template<class flt_t, class acc_t, int need_ic>
void NPairFullBinGhostIntel::fbi(const int offload, NeighList * list,
IntelBuffers<flt_t,acc_t> * buffers,
const int pstart, const int pend) {
if (pend-pstart == 0) return;
const int nall = atom->nlocal + atom->nghost;
int pad = 1;
int nall_t = nall;
const int aend = nall;
const int pack_width = _fix->nbor_pack_width();
const ATOM_T * _noalias const x = buffers->get_x();
int * _noalias const firstneigh = buffers->firstneigh(list);
const int e_nall = nall_t;
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
int tag_size = 0, special_size;
if (buffers->need_tag()) tag_size = e_nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
special_size = 0;
}
const tagint * _noalias const special = s;
const int * _noalias const nspecial = ns;
const int maxspecial = atom->maxspecial;
const tagint * _noalias const tag = atom->tag;
int * _noalias const ilist = list->ilist;
int * _noalias numneigh = list->numneigh;
int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int nstencil = this->nstencil;
const int * _noalias const stencil = this->stencil;
const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
const flt_t * _noalias const cutneighghostsq =
buffers->get_cutneighghostsq()[0];
const int ntypes = atom->ntypes + 1;
const int nlocal = atom->nlocal;
#ifndef _LMP_INTEL_OFFLOAD
int * const mask = atom->mask;
tagint * const molecule = atom->molecule;
#endif
int *molindex = atom->molindex;
int *molatom = atom->molatom;
Molecule **onemols = atom->avec->onemols;
int moltemplate;
if (molecular == 2) moltemplate = 1;
else moltemplate = 0;
if (moltemplate)
error->all(FLERR,
"Can't use moltemplate with npair style full/bin/ghost/intel.");
int tnum;
int *overflow;
double *timer_compute;
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
timer_compute = _fix->off_watch_neighbor();
tnum = buffers->get_off_threads();
overflow = _fix->get_off_overflow_flag();
_fix->stop_watch(TIME_HOST_NEIGHBOR);
_fix->start_watch(TIME_OFFLOAD_LATENCY);
} else
#endif
{
tnum = comm->nthreads;
overflow = _fix->get_overflow_flag();
}
const int nthreads = tnum;
const int maxnbors = buffers->get_max_nbors();
int * _noalias const atombin = buffers->get_atombin();
const int * _noalias const binpacked = buffers->get_binpacked();
const int xperiodic = domain->xperiodic;
const int yperiodic = domain->yperiodic;
const int zperiodic = domain->zperiodic;
const flt_t xprd_half = domain->xprd_half;
const flt_t yprd_half = domain->yprd_half;
const flt_t zprd_half = domain->zprd_half;
flt_t * _noalias const ncachex = buffers->get_ncachex();
flt_t * _noalias const ncachey = buffers->get_ncachey();
flt_t * _noalias const ncachez = buffers->get_ncachez();
int * _noalias const ncachej = buffers->get_ncachej();
int * _noalias const ncachejtype = buffers->get_ncachejtype();
int * _noalias const ncachetag = buffers->get_ncachetag();
const int ncache_stride = buffers->ncache_stride();
const int mbinx = this->mbinx;
const int mbiny = this->mbiny;
const int mbinz = this->mbinz;
const int * const stencilxyz = &this->stencilxyz[0][0];
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const bins = this->bins;
const int cop = _fix->coprocessor_number();
const int separate_buffers = _fix->separate_buffers();
#pragma offload target(mic:cop) if(offload) \
in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
in(tag:length(tag_size) alloc_if(0) free_if(0)) \
in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
in(bins,binpacked:length(nall) alloc_if(0) free_if(0)) \
in(binhead:length(mbins+1) alloc_if(0) free_if(0)) \
in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
in(cutneighghostsq:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
in(numneigh:length(0) alloc_if(0) free_if(0)) \
in(ilist:length(0) alloc_if(0) free_if(0)) \
in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \
in(ncachejtype,ncachetag:length(0) alloc_if(0) free_if(0)) \
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
in(separate_buffers,aend,nlocal,molecular,ntypes,mbinx,mbiny) \
in(mbinz,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
in(stencilxyz:length(3*nstencil)) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
#ifdef _LMP_INTEL_OFFLOAD
overflow[LMP_LOCAL_MIN] = 0;
overflow[LMP_LOCAL_MAX] = aend - 1;
overflow[LMP_GHOST_MIN] = e_nall;
overflow[LMP_GHOST_MAX] = -1;
#endif
int nstencilp = 0;
int binstart[INTEL_MAX_STENCIL], binend[INTEL_MAX_STENCIL];
for (int k = 0; k < nstencil; k++) {
binstart[nstencilp] = stencil[k];
int end = stencil[k] + 1;
for (int kk = k + 1; kk < nstencil; kk++) {
if (stencil[kk-1]+1 == stencil[kk]) {
end++;
k++;
} else break;
}
binend[nstencilp] = end;
nstencilp++;
}
const int mbinyx = mbiny * mbinx;
#if defined(_OPENMP)
#pragma omp parallel
#endif
{
const int num = aend;
int tid, ifrom, ito;
const double balance_factor = 2.0;
const double ibalance_factor = 1.0 / balance_factor;
const int gnum = num - nlocal;
const int wlocal = static_cast<int>(ceil(balance_factor * nlocal));
const int snum = wlocal + gnum;
IP_PRE_omp_range_id(ifrom, ito, tid, snum, nthreads);
if (ifrom < wlocal) ifrom = static_cast<int>(ibalance_factor * ifrom);
else ifrom -= wlocal - nlocal;
if (ito < wlocal) ito = static_cast<int>(ibalance_factor * ito);
else ito -= wlocal - nlocal;
int e_ito = ito;
const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
int which;
int pack_offset = maxnbors;
int ct = (ifrom + tid * 2) * maxnbors;
int *neighptr = firstneigh + ct;
const int obound = pack_offset + maxnbors * 2;
const int toffs = tid * ncache_stride;
flt_t * _noalias const tx = ncachex + toffs;
flt_t * _noalias const ty = ncachey + toffs;
flt_t * _noalias const tz = ncachez + toffs;
int * _noalias const tj = ncachej + toffs;
int * _noalias const tjtype = ncachejtype + toffs;
int * _noalias const ttag = ncachetag + toffs;
// loop over all atoms in other bins in stencil, store every pair
int istart, icount, ncount, oldbin = -9999999, lane, max_chunk;
for (int i = ifrom; i < ito; i++) {
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const int itype = x[i].w;
const tagint itag = tag[i];
const int ioffset = ntypes * itype;
const int ibin = atombin[i];
if (ibin != oldbin) {
oldbin = ibin;
ncount = 0;
if (i < nlocal) {
for (int k = 0; k < nstencilp; k++) {
const int bstart = binhead[ibin + binstart[k]];
const int bend = binhead[ibin + binend[k]];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = bstart; jj < bend; jj++)
tj[ncount++] = binpacked[jj];
}
} else {
const int zbin = ibin / mbinyx;
const int zrem = ibin % mbinyx;
const int ybin = zrem / mbinx;
const int xbin = zrem % mbinx;
for (int k = 0; k < nstencil; k++) {
const int xbin2 = xbin + stencilxyz[3 * k + 0];
const int ybin2 = ybin + stencilxyz[3 * k + 1];
const int zbin2 = zbin + stencilxyz[3 * k + 2];
if (xbin2 < 0 || xbin2 >= mbinx ||
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
const int bstart = binhead[ibin + stencil[k]];
const int bend = binhead[ibin + stencil[k] + 1];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = bstart; jj < bend; jj++)
tj[ncount++] = binpacked[jj];
}
} // if i < nlocal
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int u = 0; u < ncount; u++) {
const int j = tj[u];
tx[u] = x[j].x;
ty[u] = x[j].y;
tz[u] = x[j].z;
tjtype[u] = x[j].w;
ttag[u] = tag[j];
}
} // if ibin != oldbin
// ---------------------- Loop over other bins
int n = maxnbors;
int n2 = n * 2;
int *neighptr2 = neighptr;
const flt_t * _noalias cutsq;
if (i < nlocal) cutsq = cutneighsq;
else cutsq = cutneighghostsq;
const int icp = i;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int u = 0; u < ncount; u++) {
int addme = 1;
int j = tj[u];
if (i == j) addme = 0;
// Cutoff Check
const flt_t delx = xtmp - tx[u];
const flt_t dely = ytmp - ty[u];
const flt_t delz = ztmp - tz[u];
const int jtype = tjtype[u];
const int jtag = ttag[u];
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutsq[ioffset + jtype]) addme = 0;
if (need_ic && icp < nlocal) {
int no_special;
ominimum_image_check(no_special, delx, dely, delz);
if (no_special)
j = -j - 1;
}
int flist = 0;
if (itag > jtag) {
if (((itag+jtag) & 1) == 0) flist = 1;
} else if (itag < jtag) {
if (((itag+jtag) & 1) == 1) flist = 1;
} else {
if (tz[u] < ztmp) flist = 1;
else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1;
else if (tz[u] == ztmp && ty[u] == ytmp && tx[u] < xtmp)
flist = 1;
}
if (addme) {
if (flist)
neighptr2[n2++] = j;
else
neighptr[n++] = j;
}
} // for u
#ifndef _LMP_INTEL_OFFLOAD
if (exclude) {
int alln = n;
n = maxnbors;
for (int u = pack_offset; u < alln; u++) {
const int j = neighptr[u];
int pj = j;
if (need_ic)
if (pj < 0) pj = -j - 1;
const int jtype = x[pj].w;
if (exclusion(i,pj,itype,jtype,mask,molecule)) continue;
neighptr[n++] = j;
}
alln = n2;
n2 = maxnbors * 2;
for (int u = n2; u < alln; u++) {
const int j = neighptr[u];
int pj = j;
if (need_ic)
if (pj < 0) pj = -j - 1;
const int jtype = x[pj].w;
if (exclusion(i,pj,itype,jtype,mask,molecule)) continue;
neighptr[n2++] = j;
}
}
#endif
int ns = n - maxnbors;
int alln = n;
atombin[i] = ns;
n = 0;
for (int u = maxnbors; u < alln; u++)
neighptr[n++] = neighptr[u];
ns += n2 - maxnbors * 2;
for (int u = maxnbors * 2; u < n2; u++)
neighptr[n++] = neighptr[u];
if (ns > maxnbors) *overflow = 1;
ilist[i] = i;
cnumneigh[i] = ct;
numneigh[i] = ns;
ct += ns;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
const int edge = ct & (alignb - 1);
if (edge) ct += alignb - edge;
neighptr = firstneigh + ct;
if (ct + obound > list_size) {
if (i < ito - 1) {
*overflow = 1;
ct = (ifrom + tid * 2) * maxnbors;
}
}
}
if (*overflow == 1)
for (int i = ifrom; i < ito; i++)
numneigh[i] = 0;
#ifdef _LMP_INTEL_OFFLOAD
int ghost_offset = 0, nall_offset = e_nall;
if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
#if __INTEL_COMPILER+0 > 1499
#pragma vector aligned
#pragma simd
#endif
for (int jj = 0; jj < jnum; jj++) {
int j = jlist[jj];
if (need_ic && j < 0) j = -j - 1;
}
}
overflow[LMP_LOCAL_MIN] = 0;
overflow[LMP_LOCAL_MAX] = nlocal - 1;
overflow[LMP_GHOST_MIN] = nlocal;
overflow[LMP_GHOST_MAX] = e_nall - 1;
int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
if (nghost < 0) nghost = 0;
if (offload) {
ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
} else {
ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
nall_offset = nlocal + nghost;
}
} // if separate_buffers
#endif
if (molecular) {
int ito_m = ito;
if (ito >= nlocal) ito_m = nlocal;
for (int i = ifrom; i < ito_m; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd
#endif
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj];
if (need_ic && j < 0) {
which = 0;
jlist[jj] = -j - 1;
} else
ofind_special(which, special, nspecial, i, tag[j]);
#ifdef _LMP_INTEL_OFFLOAD
if (j >= nlocal) {
if (j == e_nall)
jlist[jj] = nall_offset;
else if (which)
jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
else jlist[jj]-=ghost_offset;
} else
#endif
if (which) jlist[jj] = j ^ (which << SBBITS);
}
} // for i
} // if molecular
#ifdef _LMP_INTEL_OFFLOAD
else if (separate_buffers) {
for (int i = ifrom; i < ito; ++i) {
int * _noalias jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
int jj = 0;
#pragma vector aligned
#pragma simd
for (jj = 0; jj < jnum; jj++) {
if (jlist[jj] >= nlocal) {
if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
else jlist[jj] -= ghost_offset;
}
}
}
}
#endif
} // end omp
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end offload
#ifdef _LMP_INTEL_OFFLOAD
if (offload) {
_fix->stop_watch(TIME_OFFLOAD_LATENCY);
_fix->start_watch(TIME_HOST_NEIGHBOR);
for (int n = 0; n < aend; n++) {
ilist[n] = n;
numneigh[n] = 0;
}
} else {
for (int i = 0; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
if (separate_buffers) {
_fix->start_watch(TIME_PACK);
_fix->set_neighbor_host_sizes();
buffers->pack_sep_from_single(_fix->host_min_local(),
_fix->host_used_local(),
_fix->host_min_ghost(),
_fix->host_used_ghost());
_fix->stop_watch(TIME_PACK);
}
}
#else
#pragma vector aligned
#pragma simd
for (int i = 0; i < aend; i++)
list->firstneigh[i] = firstneigh + cnumneigh[i];
#endif
}

View File

@ -0,0 +1,55 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef NPAIR_CLASS
NPairStyle(full/bin/ghost/intel,
NPairFullBinGhostIntel,
NP_FULL | NP_BIN | NP_GHOST | NP_NEWTON | NP_NEWTOFF |
NP_ORTHO | NP_TRI | NP_INTEL)
#else
#ifndef LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
#define LMP_NPAIR_FULL_BIN_GHOST_INTEL_H
#include "npair_intel.h"
namespace LAMMPS_NS {
class NPairFullBinGhostIntel : public NPairIntel {
public:
NPairFullBinGhostIntel(class LAMMPS *);
~NPairFullBinGhostIntel() {}
void build(class NeighList *);
private:
template<class flt_t, class acc_t>
void fbi(NeighList * list, IntelBuffers<flt_t,acc_t> * buffers);
template<class flt_t, class acc_t, int need_ic>
void fbi(const int offload, NeighList * list,
IntelBuffers<flt_t,acc_t> * buffers,
const int astart, const int aend);
};
}
#endif
#endif
/* ERROR/WARNING messages:
*/

View File

@ -143,6 +143,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
flt_t * _noalias const ncachez = buffers->get_ncachez(); flt_t * _noalias const ncachez = buffers->get_ncachez();
int * _noalias const ncachej = buffers->get_ncachej(); int * _noalias const ncachej = buffers->get_ncachej();
int * _noalias const ncachejtype = buffers->get_ncachejtype(); int * _noalias const ncachejtype = buffers->get_ncachejtype();
int * _noalias const ncachetag = buffers->get_ncachetag();
const int ncache_stride = buffers->ncache_stride(); const int ncache_stride = buffers->ncache_stride();
#ifdef _LMP_INTEL_OFFLOAD #ifdef _LMP_INTEL_OFFLOAD
@ -165,7 +166,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
in(atombin:length(aend) alloc_if(0) free_if(0)) \ in(atombin:length(aend) alloc_if(0) free_if(0)) \
in(stencil:length(nstencil) alloc_if(0) free_if(0)) \ in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \ in(ncachex,ncachey,ncachez,ncachej:length(0) alloc_if(0) free_if(0)) \
in(ncachejtype:length(0) alloc_if(0) free_if(0)) \ in(ncachejtype,ncachetag:length(0) alloc_if(0) free_if(0)) \
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \ in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
in(pad_width,offload_end,separate_buffers,astart,aend,nlocal,molecular) \ in(pad_width,offload_end,separate_buffers,astart,aend,nlocal,molecular) \
in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \ in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
@ -222,7 +223,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
ito += astart; ito += astart;
int e_ito = ito; int e_ito = ito;
if (THREE && ito == num) { if (THREE && ito == num) {
int imod = ito % pack_width; int imod = ito & (pack_width - 1);
if (imod) e_ito += pack_width - imod; if (imod) e_ito += pack_width - imod;
} }
const int list_size = (e_ito + tid * 2 + 2) * maxnbors; const int list_size = (e_ito + tid * 2 + 2) * maxnbors;
@ -241,6 +242,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
flt_t * _noalias const tz = ncachez + toffs; flt_t * _noalias const tz = ncachez + toffs;
int * _noalias const tj = ncachej + toffs; int * _noalias const tj = ncachej + toffs;
int * _noalias const tjtype = ncachejtype + toffs; int * _noalias const tjtype = ncachejtype + toffs;
int * _noalias const ttag = ncachetag + toffs;
flt_t * _noalias itx; flt_t * _noalias itx;
flt_t * _noalias ity; flt_t * _noalias ity;
@ -287,13 +289,14 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
ty[u] = x[j].y; ty[u] = x[j].y;
tz[u] = x[j].z; tz[u] = x[j].z;
tjtype[u] = x[j].w; tjtype[u] = x[j].w;
if (THREE) ttag[u] = tag[j];
} }
if (FULL == 0 || TRI == 1) { if (FULL == 0 || TRI == 1) {
icount = 0; icount = 0;
istart = ncount; istart = ncount;
const int alignb = INTEL_DATA_ALIGN / sizeof(int); const int alignb = INTEL_DATA_ALIGN / sizeof(int);
int nedge = istart % alignb; int nedge = istart & (alignb - 1);
if (nedge) istart + (alignb - nedge); if (nedge) istart + (alignb - nedge);
itx = tx + istart; itx = tx + istart;
ity = ty + istart; ity = ty + istart;
@ -343,7 +346,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
// i bin (half) check and offload ghost check // i bin (half) check and offload ghost check
if (j < nlocal) { if (j < nlocal) {
const int ijmod = (i + j) % 2; const int ijmod = (i + j) & 1;
if (i > j) { if (i > j) {
if (ijmod == 0) addme = 0; if (ijmod == 0) addme = 0;
} else if (i < j) { } else if (i < j) {
@ -424,8 +427,6 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
} }
#endif #endif
int pj;
if (THREE) pj = j;
if (need_ic) { if (need_ic) {
int no_special; int no_special;
ominimum_image_check(no_special, delx, dely, delz); ominimum_image_check(no_special, delx, dely, delz);
@ -434,12 +435,12 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
} }
if (THREE) { if (THREE) {
const int jtag = tag[pj]; const int jtag = ttag[u];
int flist = 0; int flist = 0;
if (itag > jtag) { if (itag > jtag) {
if ((itag+jtag) % 2 == 0) flist = 1; if (((itag+jtag) & 1) == 0) flist = 1;
} else if (itag < jtag) { } else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) flist = 1; if (((itag+jtag) & 1) == 1) flist = 1;
} else { } else {
if (tz[u] < ztmp) flist = 1; if (tz[u] < ztmp) flist = 1;
else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1; else if (tz[u] == ztmp && ty[u] < ytmp) flist = 1;
@ -512,7 +513,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
cnumneigh[i] += lane; cnumneigh[i] += lane;
numneigh[i] = ns; numneigh[i] = ns;
} else { } else {
int edge = (n % pad_width); int edge = n & (pad_width - 1);
if (edge) { if (edge) {
const int pad_end = n + (pad_width - edge); const int pad_end = n + (pad_width - edge);
#if defined(LMP_SIMD_COMPILER) #if defined(LMP_SIMD_COMPILER)
@ -532,7 +533,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
if (lane == pack_width) { if (lane == pack_width) {
ct += max_chunk * pack_width; ct += max_chunk * pack_width;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
const int edge = (ct % alignb); const int edge = ct & (alignb - 1);
if (edge) ct += alignb - edge; if (edge) ct += alignb - edge;
neighptr = firstneigh + ct; neighptr = firstneigh + ct;
max_chunk = 0; max_chunk = 0;
@ -548,7 +549,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
} else { } else {
ct += n; ct += n;
const int alignb = (INTEL_DATA_ALIGN / sizeof(int)); const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
const int edge = (ct % alignb); const int edge = ct & (alignb - 1);
if (edge) ct += alignb - edge; if (edge) ct += alignb - edge;
neighptr = firstneigh + ct; neighptr = firstneigh + ct;
if (ct + obound > list_size) { if (ct + obound > list_size) {

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,110 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(airebo/intel,PairAIREBOIntel)
#else
#ifndef LMP_PAIR_AIREBO_INTEL_H
#define LMP_PAIR_AIREBO_INTEL_H
#include "pair.h"
#include "fix_intel.h"
#include "pair_airebo.h"
//#include "airebo_common.h"
namespace LAMMPS_NS {
template<class flt_t, class acc_t>
struct PairAIREBOIntelParam;
class PairAIREBOIntel : public PairAIREBO {
public:
PairAIREBOIntel(class LAMMPS *);
virtual ~PairAIREBOIntel();
virtual void compute(int, int);
virtual void init_style();
protected:
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers);
template <int EVFLAG, int EFLAG, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(IntelBuffers<flt_t,acc_t> * buffers);
template <class flt_t, class acc_t>
PairAIREBOIntelParam<flt_t,acc_t> get_param();
FixIntel * fix;
int _cop;
int * REBO_cnumneigh;
int * REBO_num_skin;
int * REBO_list_data;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair style AIREBO requires atom IDs
This is a requirement to use the AIREBO potential.
E: Pair style AIREBO requires newton pair on
See the newton command. This is a restriction to use the AIREBO
potential.
E: All pair coeffs are not set
All pair coefficients must be set in the data file or by the
pair_coeff command before running a simulation.
E: Neighbor list overflow, boost neigh_modify one
There are too many neighbors of a single atom. Use the neigh_modify
command to increase the max number of neighbors allowed for one atom.
You may also want to boost the page size.
E: Cannot open AIREBO potential file %s
The specified AIREBO potential file cannot be opened. Check that the
path and name are correct.
*/

View File

@ -0,0 +1,37 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#include "pair_airebo_morse_intel.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairAIREBOMorseIntel::PairAIREBOMorseIntel(LAMMPS *lmp)
: PairAIREBOIntel(lmp) {}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairAIREBOMorseIntel::settings(int narg, char **arg)
{
PairAIREBOIntel::settings(narg,arg);
morseflag = 1;
}

View File

@ -0,0 +1,40 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(airebo/morse/intel,PairAIREBOMorseIntel)
#else
#ifndef LMP_PAIR_AIREBO_MORSE_INTEL_H
#define LMP_PAIR_AIREBO_MORSE_INTEL_H
#include "pair_airebo_intel.h"
namespace LAMMPS_NS {
class PairAIREBOMorseIntel : public PairAIREBOIntel {
public:
PairAIREBOMorseIntel(class LAMMPS *);
virtual void settings(int, char **);
};
}
#endif
#endif

View File

@ -0,0 +1,326 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Stephen Foiles (SNL), Murray Daw (SNL)
------------------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_eam_alloy_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
/* ---------------------------------------------------------------------- */
PairEAMAlloyIntel::PairEAMAlloyIntel(LAMMPS *lmp) : PairEAMIntel(lmp)
{
one_coeff = 1;
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
read DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMAlloyIntel::coeff(int narg, char **arg)
{
int i,j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all(FLERR,"Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all(FLERR,"Incorrect args for pair coefficients");
// read EAM setfl file
if (setfl) {
for (i = 0; i < setfl->nelements; i++) delete [] setfl->elements[i];
delete [] setfl->elements;
delete [] setfl->mass;
memory->destroy(setfl->frho);
memory->destroy(setfl->rhor);
memory->destroy(setfl->z2r);
delete setfl;
}
setfl = new Setfl();
read_file(arg[2]);
// read args that map atom types to elements in potential file
// map[i] = which element the Ith atom type is, -1 if NULL
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
continue;
}
for (j = 0; j < setfl->nelements; j++)
if (strcmp(arg[i],setfl->elements[j]) == 0) break;
if (j < setfl->nelements) map[i-2] = j;
else error->all(FLERR,"No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
int count = 0;
for (i = 1; i <= n; i++) {
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(FLERR,i,setfl->mass[map[i]]);
count++;
}
scale[i][j] = 1.0;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
read a multi-element DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMAlloyIntel::read_file(char *filename)
{
Setfl *file = setfl;
// open potential file
int me = comm->me;
FILE *fptr;
char line[MAXLINE];
if (me == 0) {
fptr = force->open_potential(filename);
if (fptr == NULL) {
char str[128];
sprintf(str,"Cannot open EAM potential file %s",filename);
error->one(FLERR,str);
}
}
// read and broadcast header
// extract element names from nelements line
int n;
if (me == 0) {
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
n = strlen(line) + 1;
}
MPI_Bcast(&n,1,MPI_INT,0,world);
MPI_Bcast(line,n,MPI_CHAR,0,world);
sscanf(line,"%d",&file->nelements);
int nwords = atom->count_words(line);
if (nwords != file->nelements + 1)
error->all(FLERR,"Incorrect element names in EAM potential file");
char **words = new char*[file->nelements+1];
nwords = 0;
strtok(line," \t\n\r\f");
while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
file->elements = new char*[file->nelements];
for (int i = 0; i < file->nelements; i++) {
n = strlen(words[i]) + 1;
file->elements[i] = new char[n];
strcpy(file->elements[i],words[i]);
}
delete [] words;
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg %d %lg %lg",
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
}
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
file->mass = new double[file->nelements];
memory->create(file->frho,file->nelements,file->nrho+1,"pair:frho");
memory->create(file->rhor,file->nelements,file->nr+1,"pair:rhor");
memory->create(file->z2r,file->nelements,file->nelements,file->nr+1,
"pair:z2r");
int i,j,tmp;
for (i = 0; i < file->nelements; i++) {
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
}
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nr,&file->rhor[i][1]);
MPI_Bcast(&file->rhor[i][1],file->nr,MPI_DOUBLE,0,world);
}
for (i = 0; i < file->nelements; i++)
for (j = 0; j <= i; j++) {
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
// close the potential file
if (me == 0) fclose(fptr);
}
/* ----------------------------------------------------------------------
copy read-in setfl potential to standard array format
------------------------------------------------------------------------- */
void PairEAMAlloyIntel::file2array()
{
int i,j,m,n;
int ntypes = atom->ntypes;
// set function params directly from setfl file
nrho = setfl->nrho;
nr = setfl->nr;
drho = setfl->drho;
dr = setfl->dr;
rhomax = (nrho-1) * drho;
// ------------------------------------------------------------------
// setup frho arrays
// ------------------------------------------------------------------
// allocate frho arrays
// nfrho = # of setfl elements + 1 for zero array
nfrho = setfl->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
// copy each element's frho to global frho
for (i = 0; i < setfl->nelements; i++)
for (m = 1; m <= nrho; m++) frho[i][m] = setfl->frho[i][m];
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
// ------------------------------------------------------------------
// setup rhor arrays
// ------------------------------------------------------------------
// allocate rhor arrays
// nrhor = # of setfl elements
nrhor = setfl->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
// copy each element's rhor to global rhor
for (i = 0; i < setfl->nelements; i++)
for (m = 1; m <= nr; m++) rhor[i][m] = setfl->rhor[i][m];
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
// for setfl files, I,J mapping only depends on I
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i];
// ------------------------------------------------------------------
// setup z2r arrays
// ------------------------------------------------------------------
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of setfl elements
nz2r = setfl->nelements * (setfl->nelements+1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
n = 0;
for (i = 0; i < setfl->nelements; i++)
for (j = 0; j <= i; j++) {
for (m = 1; m <= nr; m++) z2r[n][m] = setfl->z2r[i][j][m];
n++;
}
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
// set of z2r arrays only fill lower triangular Nelement matrix
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
// swap indices when irow < icol to stay lower triangular
// if map = -1 (non-EAM atom in pair hybrid):
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];
icol = map[j];
if (irow == -1 || icol == -1) {
type2z2r[i][j] = 0;
continue;
}
if (irow < icol) {
irow = map[j];
icol = map[i];
}
n = 0;
for (m = 0; m < irow; m++) n += m + 1;
n += icol;
type2z2r[i][j] = n;
}
}
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/alloy/intel,PairEAMAlloyIntel)
#else
#ifndef LMP_PAIR_EAM_ALLOY_INTEL_H
#define LMP_PAIR_EAM_ALLOY_INTEL_H
#include "pair_eam_intel.h"
namespace LAMMPS_NS {
// need virtual public b/c of how eam/alloy/opt inherits from it
class PairEAMAlloyIntel : virtual public PairEAMIntel {
public:
PairEAMAlloyIntel(class LAMMPS *);
virtual ~PairEAMAlloyIntel() {}
void coeff(int, char **);
protected:
void read_file(char *);
void file2array();
};
}
#endif
#endif

View File

@ -0,0 +1,335 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: Tim Lau (MIT)
------------------------------------------------------------------------- */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "pair_eam_fs_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define MAXLINE 1024
/* ---------------------------------------------------------------------- */
PairEAMFSIntel::PairEAMFSIntel(LAMMPS *lmp) : PairEAMIntel(lmp)
{
one_coeff = 1;
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
read EAM Finnis-Sinclair file
------------------------------------------------------------------------- */
void PairEAMFSIntel::coeff(int narg, char **arg)
{
int i,j;
if (!allocated) allocate();
if (narg != 3 + atom->ntypes)
error->all(FLERR,"Incorrect args for pair coefficients");
// insure I,J args are * *
if (strcmp(arg[0],"*") != 0 || strcmp(arg[1],"*") != 0)
error->all(FLERR,"Incorrect args for pair coefficients");
// read EAM Finnis-Sinclair file
if (fs) {
for (i = 0; i < fs->nelements; i++) delete [] fs->elements[i];
delete [] fs->elements;
delete [] fs->mass;
memory->destroy(fs->frho);
memory->destroy(fs->rhor);
memory->destroy(fs->z2r);
delete fs;
}
fs = new Fs();
read_file(arg[2]);
// read args that map atom types to elements in potential file
// map[i] = which element the Ith atom type is, -1 if NULL
for (i = 3; i < narg; i++) {
if (strcmp(arg[i],"NULL") == 0) {
map[i-2] = -1;
continue;
}
for (j = 0; j < fs->nelements; j++)
if (strcmp(arg[i],fs->elements[j]) == 0) break;
if (j < fs->nelements) map[i-2] = j;
else error->all(FLERR,"No matching element in EAM potential file");
}
// clear setflag since coeff() called once with I,J = * *
int n = atom->ntypes;
for (i = 1; i <= n; i++)
for (j = i; j <= n; j++)
setflag[i][j] = 0;
// set setflag i,j for type pairs where both are mapped to elements
// set mass of atom type if i = j
int count = 0;
for (i = 1; i <= n; i++) {
for (j = i; j <= n; j++) {
if (map[i] >= 0 && map[j] >= 0) {
setflag[i][j] = 1;
if (i == j) atom->set_mass(FLERR,i,fs->mass[map[i]]);
count++;
}
scale[i][j] = 1.0;
}
}
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
read a multi-element DYNAMO setfl file
------------------------------------------------------------------------- */
void PairEAMFSIntel::read_file(char *filename)
{
Fs *file = fs;
// open potential file
int me = comm->me;
FILE *fptr;
char line[MAXLINE];
if (me == 0) {
fptr = force->open_potential(filename);
if (fptr == NULL) {
char str[128];
sprintf(str,"Cannot open EAM potential file %s",filename);
error->one(FLERR,str);
}
}
// read and broadcast header
// extract element names from nelements line
int n;
if (me == 0) {
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
fgets(line,MAXLINE,fptr);
n = strlen(line) + 1;
}
MPI_Bcast(&n,1,MPI_INT,0,world);
MPI_Bcast(line,n,MPI_CHAR,0,world);
sscanf(line,"%d",&file->nelements);
int nwords = atom->count_words(line);
if (nwords != file->nelements + 1)
error->all(FLERR,"Incorrect element names in EAM potential file");
char **words = new char*[file->nelements+1];
nwords = 0;
strtok(line," \t\n\r\f");
while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
file->elements = new char*[file->nelements];
for (int i = 0; i < file->nelements; i++) {
n = strlen(words[i]) + 1;
file->elements[i] = new char[n];
strcpy(file->elements[i],words[i]);
}
delete [] words;
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg %d %lg %lg",
&file->nrho,&file->drho,&file->nr,&file->dr,&file->cut);
}
MPI_Bcast(&file->nrho,1,MPI_INT,0,world);
MPI_Bcast(&file->drho,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->nr,1,MPI_INT,0,world);
MPI_Bcast(&file->dr,1,MPI_DOUBLE,0,world);
MPI_Bcast(&file->cut,1,MPI_DOUBLE,0,world);
file->mass = new double[file->nelements];
memory->create(file->frho,file->nelements,file->nrho+1,
"pair:frho");
memory->create(file->rhor,file->nelements,file->nelements,
file->nr+1,"pair:rhor");
memory->create(file->z2r,file->nelements,file->nelements,
file->nr+1,"pair:z2r");
int i,j,tmp;
for (i = 0; i < file->nelements; i++) {
if (me == 0) {
fgets(line,MAXLINE,fptr);
sscanf(line,"%d %lg",&tmp,&file->mass[i]);
}
MPI_Bcast(&file->mass[i],1,MPI_DOUBLE,0,world);
if (me == 0) grab(fptr,file->nrho,&file->frho[i][1]);
MPI_Bcast(&file->frho[i][1],file->nrho,MPI_DOUBLE,0,world);
for (j = 0; j < file->nelements; j++) {
if (me == 0) grab(fptr,file->nr,&file->rhor[i][j][1]);
MPI_Bcast(&file->rhor[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
}
for (i = 0; i < file->nelements; i++)
for (j = 0; j <= i; j++) {
if (me == 0) grab(fptr,file->nr,&file->z2r[i][j][1]);
MPI_Bcast(&file->z2r[i][j][1],file->nr,MPI_DOUBLE,0,world);
}
// close the potential file
if (me == 0) fclose(fptr);
}
/* ----------------------------------------------------------------------
copy read-in setfl potential to standard array format
------------------------------------------------------------------------- */
void PairEAMFSIntel::file2array()
{
int i,j,m,n;
int ntypes = atom->ntypes;
// set function params directly from fs file
nrho = fs->nrho;
nr = fs->nr;
drho = fs->drho;
dr = fs->dr;
rhomax = (nrho-1) * drho;
// ------------------------------------------------------------------
// setup frho arrays
// ------------------------------------------------------------------
// allocate frho arrays
// nfrho = # of fs elements + 1 for zero array
nfrho = fs->nelements + 1;
memory->destroy(frho);
memory->create(frho,nfrho,nrho+1,"pair:frho");
// copy each element's frho to global frho
for (i = 0; i < fs->nelements; i++)
for (m = 1; m <= nrho; m++) frho[i][m] = fs->frho[i][m];
// add extra frho of zeroes for non-EAM types to point to (pair hybrid)
// this is necessary b/c fp is still computed for non-EAM atoms
for (m = 1; m <= nrho; m++) frho[nfrho-1][m] = 0.0;
// type2frho[i] = which frho array (0 to nfrho-1) each atom type maps to
// if atom type doesn't point to element (non-EAM atom in pair hybrid)
// then map it to last frho array of zeroes
for (i = 1; i <= ntypes; i++)
if (map[i] >= 0) type2frho[i] = map[i];
else type2frho[i] = nfrho-1;
// ------------------------------------------------------------------
// setup rhor arrays
// ------------------------------------------------------------------
// allocate rhor arrays
// nrhor = square of # of fs elements
nrhor = fs->nelements * fs->nelements;
memory->destroy(rhor);
memory->create(rhor,nrhor,nr+1,"pair:rhor");
// copy each element pair rhor to global rhor
n = 0;
for (i = 0; i < fs->nelements; i++)
for (j = 0; j < fs->nelements; j++) {
for (m = 1; m <= nr; m++) rhor[n][m] = fs->rhor[i][j][m];
n++;
}
// type2rhor[i][j] = which rhor array (0 to nrhor-1) each type pair maps to
// for fs files, there is a full NxN set of rhor arrays
// OK if map = -1 (non-EAM atom in pair hybrid) b/c type2rhor not used
for (i = 1; i <= ntypes; i++)
for (j = 1; j <= ntypes; j++)
type2rhor[i][j] = map[i] * fs->nelements + map[j];
// ------------------------------------------------------------------
// setup z2r arrays
// ------------------------------------------------------------------
// allocate z2r arrays
// nz2r = N*(N+1)/2 where N = # of fs elements
nz2r = fs->nelements * (fs->nelements+1) / 2;
memory->destroy(z2r);
memory->create(z2r,nz2r,nr+1,"pair:z2r");
// copy each element pair z2r to global z2r, only for I >= J
n = 0;
for (i = 0; i < fs->nelements; i++)
for (j = 0; j <= i; j++) {
for (m = 1; m <= nr; m++) z2r[n][m] = fs->z2r[i][j][m];
n++;
}
// type2z2r[i][j] = which z2r array (0 to nz2r-1) each type pair maps to
// set of z2r arrays only fill lower triangular Nelement matrix
// value = n = sum over rows of lower-triangular matrix until reach irow,icol
// swap indices when irow < icol to stay lower triangular
// if map = -1 (non-EAM atom in pair hybrid):
// type2z2r is not used by non-opt
// but set type2z2r to 0 since accessed by opt
int irow,icol;
for (i = 1; i <= ntypes; i++) {
for (j = 1; j <= ntypes; j++) {
irow = map[i];
icol = map[j];
if (irow == -1 || icol == -1) {
type2z2r[i][j] = 0;
continue;
}
if (irow < icol) {
irow = map[j];
icol = map[i];
}
n = 0;
for (m = 0; m < irow; m++) n += m + 1;
n += icol;
type2z2r[i][j] = n;
}
}
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(eam/fs/intel,PairEAMFSIntel)
#else
#ifndef LMP_PAIR_EAM_FS_INTEL_H
#define LMP_PAIR_EAM_FS_INTEL_H
#include "pair_eam_intel.h"
namespace LAMMPS_NS {
// need virtual public b/c of how eam/fs/opt inherits from it
class PairEAMFSIntel : virtual public PairEAMIntel {
public:
PairEAMFSIntel(class LAMMPS *);
virtual ~PairEAMFSIntel() {}
void coeff(int, char **);
protected:
void read_file(char *);
void file2array();
};
}
#endif
#endif

View File

@ -428,7 +428,7 @@ void PairGayBerneIntel::eval(const int offload, const int vflag,
} else } else
multiple_forms = true; multiple_forms = true;
} }
const int edge = (packed_j % pad_width); const int edge = packed_j & (pad_width - 1);
if (edge) { if (edge) {
const int packed_end = packed_j + (pad_width - edge); const int packed_end = packed_j + (pad_width - edge);
#if defined(LMP_SIMD_COMPILER) #if defined(LMP_SIMD_COMPILER)

View File

@ -0,0 +1,595 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#include <math.h>
#include "pair_lj_charmm_coul_charmm_intel.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "group.h"
#include "memory.h"
#include "modify.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "memory.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define LJ_T typename IntelBuffers<flt_t,flt_t>::vec4_t
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulCharmmIntel::PairLJCharmmCoulCharmmIntel(LAMMPS *lmp) :
PairLJCharmmCoulCharmm(lmp)
{
suffix_flag |= Suffix::INTEL;
}
/* ---------------------------------------------------------------------- */
PairLJCharmmCoulCharmmIntel::~PairLJCharmmCoulCharmmIntel()
{
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag)
{
if (fix->precision()==FixIntel::PREC_MODE_MIXED)
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
force_const_single);
else if (fix->precision()==FixIntel::PREC_MODE_DOUBLE)
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
force_const_double);
else
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
force_const_single);
fix->balance_stamp();
vflag_fdotr = 0;
}
template <class flt_t, class acc_t>
void PairLJCharmmCoulCharmmIntel::compute(int eflag, int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int inum = list->inum;
const int nthreads = comm->nthreads;
const int host_start = fix->host_start_pair();
const int offload_end = fix->offload_end_pair();
const int ago = neighbor->ago;
if (ago != 0 && fix->separate_buffers() == 0) {
fix->start_watch(TIME_PACK);
int packthreads;
if (nthreads > INTEL_HTHREADS) packthreads = nthreads;
else packthreads = 1;
#if defined(_OPENMP)
#pragma omp parallel if(packthreads > 1)
#endif
{
int ifrom, ito, tid;
IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
packthreads, sizeof(ATOM_T));
buffers->thr_pack(ifrom,ito,ago);
}
fix->stop_watch(TIME_PACK);
}
// -------------------- Regular version
int ovflag = 0;
if (vflag_fdotr) ovflag = 2;
else if (vflag) ovflag = 1;
if (eflag) {
if (force->newton_pair) {
eval<1,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<1,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<1,0>(0, ovflag, buffers, fc, host_start, inum);
}
} else {
if (force->newton_pair) {
eval<0,1>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,1>(0, ovflag, buffers, fc, host_start, inum);
} else {
eval<0,0>(1, ovflag, buffers, fc, 0, offload_end);
eval<0,0>(0, ovflag, buffers, fc, host_start, inum);
}
}
}
/* ---------------------------------------------------------------------- */
template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void PairLJCharmmCoulCharmmIntel::eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc,
const int astart, const int aend)
{
const int inum = aend - astart;
if (inum == 0) return;
int nlocal, nall, minlocal;
fix->get_buffern(offload, nlocal, nall, minlocal);
const int ago = neighbor->ago;
IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
ATOM_T * _noalias const x = buffers->get_x(offload);
flt_t * _noalias const q = buffers->get_q(offload);
const int * _noalias const numneigh = list->numneigh;
const int * _noalias const cnumneigh = buffers->cnumneigh(list);
const int * _noalias const firstneigh = buffers->firstneigh(list);
const flt_t * _noalias const special_coul = fc.special_coul;
const flt_t * _noalias const special_lj = fc.special_lj;
const flt_t qqrd2e = force->qqrd2e;
const flt_t inv_denom_lj = (flt_t)1.0/denom_lj;
const flt_t inv_denom_coul = (flt_t)1.0/denom_coul;
const flt_t * _noalias const cutsq = fc.cutsq[0];
const LJ_T * _noalias const lj = fc.lj[0];
const flt_t cut_ljsq = fc.cut_ljsq;
const flt_t cut_lj_innersq = fc.cut_lj_innersq;
const flt_t cut_coul_innersq = fc.cut_coul_innersq;
const flt_t cut_coulsq = fc.cut_coulsq;
const int ntypes = atom->ntypes + 1;
const int eatom = this->eflag_atom;
flt_t * _noalias const ccachex = buffers->get_ccachex();
flt_t * _noalias const ccachey = buffers->get_ccachey();
flt_t * _noalias const ccachez = buffers->get_ccachez();
flt_t * _noalias const ccachew = buffers->get_ccachew();
int * _noalias const ccachei = buffers->get_ccachei();
int * _noalias const ccachej = buffers->get_ccachej();
const int ccache_stride = _ccache_stride;
// Determine how much data to transfer
int x_size, q_size, f_stride, ev_size, separate_flag;
IP_PRE_get_transfern(ago, NEWTON_PAIR, EFLAG, vflag,
buffers, offload, fix, separate_flag,
x_size, q_size, ev_size, f_stride);
int tc;
FORCE_T * _noalias f_start;
acc_t * _noalias ev_global;
IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
const int nthreads = tc;
#ifdef _LMP_INTEL_OFFLOAD
int *overflow = fix->get_off_overflow_flag();
double *timer_compute = fix->off_watch_pair();
if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
#pragma offload target(mic:_cop) if(offload) \
in(special_lj,special_coul:length(0) alloc_if(0) free_if(0)) \
in(cutsq,lj:length(0) alloc_if(0) free_if(0)) \
in(firstneigh:length(0) alloc_if(0) free_if(0)) \
in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
in(numneigh:length(0) alloc_if(0) free_if(0)) \
in(x:length(x_size) alloc_if(0) free_if(0)) \
in(q:length(q_size) alloc_if(0) free_if(0)) \
in(overflow:length(0) alloc_if(0) free_if(0)) \
in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
in(ccache_stride,nthreads,qqrd2e,inum,nall,ntypes,cut_coulsq) \
in(vflag,eatom,f_stride,separate_flag,offload) \
in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
in(inv_denom_coul,cut_coul_innersq) \
out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(f_start)
#endif
{
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime();
#endif
IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
f_stride, x, q);
acc_t oevdwl, oecoul, ov0, ov1, ov2, ov3, ov4, ov5;
if (EFLAG) oevdwl = oecoul = (acc_t)0;
if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
// loop over neighbors of my atoms
#if defined(_OPENMP)
#pragma omp parallel reduction(+:oevdwl,oecoul,ov0,ov1,ov2,ov3,ov4,ov5)
#endif
{
int iifrom, iip, iito, tid;
IP_PRE_omp_stride_id(iifrom, iip, iito, tid, inum, nthreads);
iifrom += astart;
iito += astart;
int foff;
if (NEWTON_PAIR) foff = tid * f_stride - minlocal;
else foff = -minlocal;
FORCE_T * _noalias const f = f_start + foff;
if (NEWTON_PAIR) memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
flt_t cutboth = cut_coulsq;
const int toffs = tid * ccache_stride;
flt_t * _noalias const tdelx = ccachex + toffs;
flt_t * _noalias const tdely = ccachey + toffs;
flt_t * _noalias const tdelz = ccachez + toffs;
flt_t * _noalias const trsq = ccachew + toffs;
int * _noalias const tj = ccachei + toffs;
int * _noalias const tjtype = ccachej + toffs;
for (int i = iifrom; i < iito; i += iip) {
// const int i = ilist[ii];
const int itype = x[i].w;
const int ptr_off = itype * ntypes;
const flt_t * _noalias const cutsqi = cutsq + ptr_off;
const LJ_T * _noalias const lji = lj + ptr_off;
const int * _noalias const jlist = firstneigh + cnumneigh[i];
const int jnum = numneigh[i];
acc_t fxtmp,fytmp,fztmp,fwtmp;
acc_t sevdwl, secoul, sv0, sv1, sv2, sv3, sv4, sv5;
const flt_t xtmp = x[i].x;
const flt_t ytmp = x[i].y;
const flt_t ztmp = x[i].z;
const flt_t qtmp = q[i];
fxtmp = fytmp = fztmp = (acc_t)0;
if (EFLAG) fwtmp = sevdwl = secoul = (acc_t)0;
if (NEWTON_PAIR == 0)
if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
int ej = 0;
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int j = jlist[jj] & NEIGHMASK;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq < cut_coulsq) {
trsq[ej]=rsq;
tdelx[ej]=delx;
tdely[ej]=dely;
tdelz[ej]=delz;
tjtype[ej]=x[j].w;
tj[ej]=jlist[jj];
ej++;
}
}
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
sv0, sv1, sv2, sv3, sv4, sv5)
#endif
for (int jj = 0; jj < ej; jj++) {
flt_t forcecoul, forcelj, evdwl;
forcecoul = forcelj = evdwl = (flt_t)0.0;
const int j = tj[jj] & NEIGHMASK;
const int sbindex = tj[jj] >> SBBITS & 3;
const flt_t rsq = trsq[jj];
const flt_t r2inv = (flt_t)1.0 / rsq;
const flt_t r_inv = (flt_t)1.0 / sqrt(rsq);
forcecoul = qqrd2e * qtmp * q[j] * r_inv;
if (rsq > cut_coul_innersq) {
const flt_t ccr = cut_coulsq - rsq;
const flt_t switch1 = ccr * ccr * inv_denom_coul *
(cut_coulsq + (flt_t)2.0 * rsq - (flt_t)3.0 * cut_coul_innersq);
forcecoul *= switch1;
}
#ifdef INTEL_VMASK
if (rsq < cut_ljsq) {
#endif
const int jtype = tjtype[jj];
flt_t r6inv = r2inv * r2inv * r2inv;
forcelj = r6inv * (lji[jtype].x * r6inv - lji[jtype].y);
if (EFLAG) evdwl = r6inv*(lji[jtype].z * r6inv - lji[jtype].w);
#ifdef INTEL_VMASK
if (rsq > cut_lj_innersq) {
#endif
const flt_t drsq = cut_ljsq - rsq;
const flt_t cut2 = (rsq - cut_lj_innersq) * drsq;
const flt_t switch1 = drsq * (drsq * drsq + (flt_t)3.0 * cut2) *
inv_denom_lj;
const flt_t switch2 = (flt_t)12.0 * rsq * cut2 * inv_denom_lj;
if (EFLAG) {
#ifndef INTEL_VMASK
if (rsq > cut_lj_innersq) {
#endif
forcelj = forcelj * switch1 + evdwl * switch2;
evdwl *= switch1;
#ifndef INTEL_VMASK
}
#endif
} else {
const flt_t philj = r6inv * (lji[jtype].z*r6inv -
lji[jtype].w);
#ifndef INTEL_VMASK
if (rsq > cut_lj_innersq)
#endif
forcelj = forcelj * switch1 + philj * switch2;
}
#ifdef INTEL_VMASK
}
#endif
#ifdef INTEL_VMASK
}
#else
if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
#endif
if (sbindex) {
const flt_t factor_coul = special_coul[sbindex];
forcecoul *= factor_coul;
const flt_t factor_lj = special_lj[sbindex];
forcelj *= factor_lj;
if (EFLAG) evdwl *= factor_lj;
}
const flt_t fpair = (forcecoul + forcelj) * r2inv;
const flt_t fpx = fpair * tdelx[jj];
fxtmp += fpx;
if (NEWTON_PAIR) f[j].x -= fpx;
const flt_t fpy = fpair * tdely[jj];
fytmp += fpy;
if (NEWTON_PAIR) f[j].y -= fpy;
const flt_t fpz = fpair * tdelz[jj];
fztmp += fpz;
if (NEWTON_PAIR) f[j].z -= fpz;
if (EFLAG) {
sevdwl += evdwl;
secoul += forcecoul;
if (eatom) {
fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * forcecoul;
if (NEWTON_PAIR)
f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * forcecoul;
}
}
if (NEWTON_PAIR == 0)
IP_PRE_ev_tally_nborv(vflag, tdelx[jj], tdely[jj], tdelz[jj],
fpx, fpy, fpz);
} // for jj
if (NEWTON_PAIR) {
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
} else {
f[i].x = fxtmp;
f[i].y = fytmp;
f[i].z = fztmp;
}
IP_PRE_ev_tally_atomq(NEWTON_PAIR, EFLAG, vflag, f, fwtmp);
} // for ii
IP_PRE_fdotr_reduce_omp(NEWTON_PAIR, nall, minlocal, nthreads, f_start,
f_stride, x, offload, vflag, ov0, ov1, ov2, ov3,
ov4, ov5);
} // end of omp parallel region
IP_PRE_fdotr_reduce(NEWTON_PAIR, nall, nthreads, f_stride, vflag,
ov0, ov1, ov2, ov3, ov4, ov5);
if (EFLAG) {
if (NEWTON_PAIR == 0) {
oevdwl *= (acc_t)0.5;
oecoul *= (acc_t)0.5;
}
ev_global[0] = oevdwl;
ev_global[1] = oecoul;
}
if (vflag) {
if (NEWTON_PAIR == 0) {
ov0 *= (acc_t)0.5;
ov1 *= (acc_t)0.5;
ov2 *= (acc_t)0.5;
ov3 *= (acc_t)0.5;
ov4 *= (acc_t)0.5;
ov5 *= (acc_t)0.5;
}
ev_global[2] = ov0;
ev_global[3] = ov1;
ev_global[4] = ov2;
ev_global[5] = ov3;
ev_global[6] = ov4;
ev_global[7] = ov5;
}
#if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
*timer_compute = MIC_Wtime() - *timer_compute;
#endif
} // end of offload region
if (offload)
fix->stop_watch(TIME_OFFLOAD_LATENCY);
else
fix->stop_watch(TIME_HOST_PAIR);
if (EFLAG || vflag)
fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
else
fix->add_result_array(f_start, 0, offload);
}
/* ---------------------------------------------------------------------- */
void PairLJCharmmCoulCharmmIntel::init_style()
{
PairLJCharmmCoulCharmm::init_style();
if (force->newton_pair == 0) {
neighbor->requests[neighbor->nrequest-1]->half = 0;
neighbor->requests[neighbor->nrequest-1]->full = 1;
}
neighbor->requests[neighbor->nrequest-1]->intel = 1;
int ifix = modify->find_fix("package_intel");
if (ifix < 0)
error->all(FLERR,
"The 'package intel' command is required for /intel styles");
fix = static_cast<FixIntel *>(modify->fix[ifix]);
fix->pair_init_check();
#ifdef _LMP_INTEL_OFFLOAD
_cop = fix->coprocessor_number();
#endif
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
pack_force_const(force_const_single, fix->get_mixed_buffers());
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
pack_force_const(force_const_double, fix->get_double_buffers());
else
pack_force_const(force_const_single, fix->get_single_buffers());
}
template <class flt_t, class acc_t>
void PairLJCharmmCoulCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t,acc_t> *buffers)
{
int off_ccache = 0;
#ifdef _LMP_INTEL_OFFLOAD
if (_cop >= 0) off_ccache = 1;
#endif
buffers->grow_ccache(off_ccache, comm->nthreads, 1);
_ccache_stride = buffers->ccache_stride();
int tp1 = atom->ntypes + 1;
fc.set_ntypes(tp1, memory, _cop);
buffers->set_ntypes(tp1);
flt_t **cutneighsq = buffers->get_cutneighsq();
// Repeat cutsq calculation because done after call to init_style
double cut, cutneigh;
if (cut_lj > cut_coul)
error->all(FLERR,
"Intel varient of lj/charmm/coul/long expects lj cutoff<=coulombic");
for (int i = 1; i <= atom->ntypes; i++) {
for (int j = i; j <= atom->ntypes; j++) {
if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
cut = init_one(i, j);
cutneigh = cut + neighbor->skin;
cutsq[i][j] = cutsq[j][i] = cut*cut;
cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
}
}
}
cut_coul_innersq = cut_coul_inner * cut_coul_inner;
cut_lj_innersq = cut_lj_inner * cut_lj_inner;
cut_ljsq = cut_lj * cut_lj;
cut_coulsq = cut_coul * cut_coul;
cut_bothsq = MAX(cut_ljsq, cut_coulsq);
fc.cut_coulsq = cut_coulsq;
fc.cut_ljsq = cut_ljsq;
fc.cut_coul_innersq = cut_coul_innersq;
fc.cut_lj_innersq = cut_lj_innersq;
for (int i = 0; i < 4; i++) {
fc.special_lj[i] = force->special_lj[i];
fc.special_coul[i] = force->special_coul[i];
fc.special_coul[0] = 1.0;
fc.special_lj[0] = 1.0;
}
for (int i = 0; i < tp1; i++) {
for (int j = 0; j < tp1; j++) {
fc.lj[i][j].x = lj1[i][j];
fc.lj[i][j].y = lj2[i][j];
fc.lj[i][j].z = lj3[i][j];
fc.lj[i][j].w = lj4[i][j];
fc.cutsq[i][j] = cutsq[i][j];
}
}
#ifdef _LMP_INTEL_OFFLOAD
if (_cop < 0) return;
flt_t * special_lj = fc.special_lj;
flt_t * special_coul = fc.special_coul;
flt_t * cutsq = fc.cutsq[0];
LJ_T * lj = fc.lj[0];
flt_t * ocutneighsq = cutneighsq[0];
int tp1sq = tp1 * tp1;
#pragma offload_transfer target(mic:_cop) \
in(special_lj, special_coul: length(4) alloc_if(0) free_if(0)) \
in(cutsq,lj: length(tp1sq) alloc_if(0) free_if(0)) \
in(ocutneighsq: length(tp1sq) alloc_if(0) free_if(0))
#endif
}
/* ---------------------------------------------------------------------- */
template <class flt_t>
void PairLJCharmmCoulCharmmIntel::ForceConst<flt_t>::set_ntypes(
const int ntypes, Memory *memory, const int cop) {
if (ntypes != _ntypes) {
if (_ntypes > 0) {
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
flt_t * ocutsq = cutsq[0];
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
ospecial_coul != NULL && cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj, ospecial_coul: alloc_if(0) free_if(1)) \
nocopy(ocutsq, olj: alloc_if(0) free_if(1))
}
#endif
_memory->destroy(cutsq);
_memory->destroy(lj);
}
if (ntypes > 0) {
_cop = cop;
memory->create(cutsq,ntypes,ntypes,"fc.cutsq");
memory->create(lj,ntypes,ntypes,"fc.lj");
#ifdef _LMP_INTEL_OFFLOAD
flt_t * ospecial_lj = special_lj;
flt_t * ospecial_coul = special_coul;
flt_t * ocutsq = cutsq[0];
typename IntelBuffers<flt_t,flt_t>::vec4_t * olj = lj[0];
int tp1sq = ntypes*ntypes;
if (ospecial_lj != NULL && ocutsq != NULL && olj != NULL &&
ospecial_coul != NULL && cop >= 0) {
#pragma offload_transfer target(mic:cop) \
nocopy(ospecial_lj: length(4) alloc_if(1) free_if(0)) \
nocopy(ospecial_coul: length(4) alloc_if(1) free_if(0)) \
nocopy(ocutsq,olj: length(tp1sq) alloc_if(1) free_if(0))
}
#endif
}
}
_ntypes=ntypes;
_memory=memory;
}

View File

@ -0,0 +1,100 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: W. Michael Brown (Intel)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/charmm/coul/charmm/intel,PairLJCharmmCoulCharmmIntel)
#else
#ifndef LMP_PAIR_LJ_CHARMM_COUL_CHARMM_INTEL_H
#define LMP_PAIR_LJ_CHARMM_COUL_CHARMM_INTEL_H
#include "pair_lj_charmm_coul_charmm.h"
#include "fix_intel.h"
namespace LAMMPS_NS {
class PairLJCharmmCoulCharmmIntel : public PairLJCharmmCoulCharmm {
public:
PairLJCharmmCoulCharmmIntel(class LAMMPS *);
virtual ~PairLJCharmmCoulCharmmIntel();
virtual void compute(int, int);
void init_style();
typedef struct { float x,y,z; int w; } sng4_t;
private:
FixIntel *fix;
int _cop, _ccache_stride;
template <class flt_t> class ForceConst;
template <class flt_t, class acc_t>
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
const ForceConst<flt_t> &fc);
template <int EFLAG, int NEWTON_PAIR, class flt_t, class acc_t>
void eval(const int offload, const int vflag,
IntelBuffers<flt_t,acc_t> * buffers,
const ForceConst<flt_t> &fc, const int astart, const int aend);
template <class flt_t, class acc_t>
void pack_force_const(ForceConst<flt_t> &fc,
IntelBuffers<flt_t, acc_t> *buffers);
// ----------------------------------------------------------------------
template <class flt_t>
class ForceConst {
public:
_alignvar(flt_t special_coul[4],64);
_alignvar(flt_t special_lj[4],64);
flt_t **cutsq;
flt_t cut_coulsq, cut_ljsq;
flt_t cut_coul_innersq, cut_lj_innersq;
typename IntelBuffers<flt_t,flt_t>::vec4_t **lj;
ForceConst() : _ntypes(0) {}
~ForceConst() { set_ntypes(0,NULL,_cop); }
void set_ntypes(const int ntypes, Memory *memory, const int cop);
private:
int _ntypes, _cop;
Memory *_memory;
};
ForceConst<float> force_const_single;
ForceConst<double> force_const_double;
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: The 'package intel' command is required for /intel styles
Self-explanatory.
E: Intel varient of lj/charmm/coul/charmm expects lj cutoff<=coulombic
The intel accelerated version of the CHARMM style requires that the
Lennard-Jones cutoff is not greater than the coulombic cutoff.
*/

View File

@ -0,0 +1,42 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#include "pair_rebo_intel.h"
#include "error.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairREBOIntel::PairREBOIntel(LAMMPS *lmp) : PairAIREBOIntel(lmp) {}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairREBOIntel::settings(int narg, char **arg)
{
if (narg != 0) error->all(FLERR,"Illegal pair_style command");
cutlj = 0.0;
ljflag = torflag = 0;
//
// this one parameter for C-C interactions is different in REBO vs AIREBO
// see Favata, Micheletti, Ryu, Pugno, Comp Phys Comm (2016)
PCCf_2_0 = 0.0;
}

View File

@ -0,0 +1,40 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Markus Hohnerbach (RWTH)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(rebo/intel,PairREBOIntel)
#else
#ifndef LMP_PAIR_REBO_INTEL_H
#define LMP_PAIR_REBO_INTEL_H
#include "pair_airebo_intel.h"
namespace LAMMPS_NS {
class PairREBOIntel : public PairAIREBOIntel {
public:
PairREBOIntel(class LAMMPS *);
virtual void settings(int, char **);
};
}
#endif
#endif

View File

@ -345,16 +345,17 @@ void PairSWIntel::eval(const int offload, const int vflag,
if (jj < jnumhalf) ejnumhalf++; if (jj < jnumhalf) ejnumhalf++;
} }
} }
int ejnum_pad = ejnum;
while ( (ejnum_pad % pad_width) != 0) { int ejrem = ejnum & (pad_width - 1);
tdelx[ejnum_pad] = (flt_t)0.0; if (ejrem) ejrem = pad_width - ejrem;
tdely[ejnum_pad] = (flt_t)0.0; const int ejnum_pad = ejnum + ejrem;
tdelz[ejnum_pad] = (flt_t)0.0; for (int jj = ejnum; jj < ejnum_pad; jj++) {
trsq[ejnum_pad] = p2[3].cutsq + (flt_t)1.0; tdelx[jj] = (flt_t)0.0;
tj[ejnum_pad] = nall; tdely[jj] = (flt_t)0.0;
if (!ONETYPE) tjtype[ejnum_pad] = 0; tdelz[jj] = (flt_t)0.0;
ejnum_pad++; trsq[jj] = p2[3].cutsq + (flt_t)1.0;
tj[jj] = nall;
if (!ONETYPE) tjtype[jj] = 0;
} }
#if defined(LMP_SIMD_COMPILER) #if defined(LMP_SIMD_COMPILER)