Merge remote-tracking branch 'lammps-rw/clean-up-docs-for-sphinx' into clean-up-docs-for-sphinx

This commit is contained in:
Axel Kohlmeyer 2016-09-06 19:54:35 -04:00
commit ace5dc3c7c
24 changed files with 5383 additions and 60 deletions

View File

@ -302,6 +302,11 @@ sphere is determined by the <em>bflag1</em> parameter for the <em>body</em> keyw
The <em>bflag2</em> argument is ignored.</p>
<hr class="docutils" />
<p><strong>Specifics of body style rounded/polygon:</strong></p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">Aug 2016 - This body style has not yet been added to LAMMPS.
The info below is a placeholder.</p>
</div>
<p>The <em>rounded/polygon</em> body style represents body particles as a convex
polygon with a variable number N &gt; 2 of vertices, which can only be
used for 2d models. One example use of this body style is for 2d
@ -359,8 +364,9 @@ particles whose edge length is sqrt(2):</p>
<span class="mf">1.0</span>
</pre></div>
</div>
<p>The <span class="xref doc">pair_style body/rounded/polygon</span> command
can be used with this body style to compute body/body interactions.</p>
<p>The <span class="xref doc">pair_style body/rounded/polygon</span>
command can be used with this body style to compute body/body
interactions.</p>
<p>For output purposes via the <a class="reference internal" href="compute_body_local.html"><span class="doc">compute body/local</span></a> and <a class="reference internal" href="dump.html"><span class="doc">dump local</span></a>
commands, this body style produces one datum for each of the N
sub-particles in a body particle. The datum has 3 values:</p>

View File

@ -481,6 +481,11 @@ change this via the <a class="reference internal" href="dump_modify.html"><span
<p>The <em>fix</em> keyword can be used with a <a class="reference internal" href="fix.html"><span class="doc">fix</span></a> that produces
objects to be drawn. An example is the <span class="xref doc">fix surface/global</span> command which can draw lines
or triangles for 2d/3d simulations.</p>
<div class="admonition note">
<p class="first admonition-title">Note</p>
<p class="last">Aug 2016 - The fix surface/global command is not yet added to
LAMMPS.</p>
</div>
<p>The <em>fflag1</em> and <em>fflag2</em> settings are numerical values which are
passed to the fix to affect how the drawing of its objects is done.
See the individual fix doc page for a description of what these

View File

@ -132,7 +132,7 @@
</pre></div>
</div>
<ul class="simple">
<li>style = <em>none</em> or <em>ewald</em> or <em>ewald/disp</em> or <em>ewald/omp</em> or <em>pppm</em> or <em>pppm/cg</em> or <em>pppm/disp</em> or <em>pppm/tip4p</em> or <em>pppm/stagger</em> or <em>pppm/disp/tip4p</em> or <em>pppm/gpu</em> or <em>pppm/omp</em> or <em>pppm/cg/omp</em> or <em>pppm/tip4p/omp</em> or <em>msm</em> or <em>msm/cg</em> or <em>msm/omp</em> or <em>msm/cg/omp</em></li>
<li>style = <em>none</em> or <em>ewald</em> or <em>ewald/disp</em> or <em>ewald/omp</em> or <em>pppm</em> or <em>pppm/cg</em> or <em>pppm/disp</em> or <em>pppm/tip4p</em> or <em>pppm/stagger</em> or <em>pppm/disp/tip4p</em> or <em>pppm/gpu</em> or <em>pppm/kk</em> or <em>pppm/omp</em> or <em>pppm/cg/omp</em> or <em>pppm/tip4p/omp</em> or <em>msm</em> or <em>msm/cg</em> or <em>msm/omp</em> or <em>msm/cg/omp</em></li>
</ul>
<pre class="literal-block">
<em>none</em> value = none
@ -155,6 +155,8 @@
accuracy = desired relative error in forces
<em>pppm/gpu</em> value = accuracy
accuracy = desired relative error in forces
<em>pppm/kk</em> value = accuracy
accuracy = desired relative error in forces
<em>pppm/omp</em> value = accuracy
accuracy = desired relative error in forces
<em>pppm/cg/omp</em> value = accuracy
@ -385,6 +387,9 @@ If <em>pppm/gpu</em> is used with a GPU-enabled pair style, part of the PPPM
calculation can be performed concurrently on the GPU while other
calculations for non-bonded and bonded force calculation are performed
on the CPU.</p>
<p>The <em>pppm/kk</em> style also performs charge assignment and force
interpolation calculations on the GPU while the FFTs themselves are
calculated on the CPU in non-threaded mode.</p>
<p>These accelerated styles are part of the GPU, USER-INTEL,
KOKKOS, USER-OMP, and OPT packages respectively. They are only
enabled if LAMMPS was built with those packages. See the <a class="reference internal" href="Section_start.html#start-3"><span class="std std-ref">Making LAMMPS</span></a> section for more info.</p>
@ -406,10 +411,10 @@ the KSPACE package is installed by default.</p>
<p>For MSM, a simulation must be 3d and one can use any combination of
periodic, non-periodic, or shrink-wrapped boundaries (specified using
the <a class="reference internal" href="boundary.html"><span class="doc">boundary</span></a> command).</p>
<p>For Ewald and PPPM, a simulation must be 3d and periodic in all dimensions.
The only exception is if the slab option is set with <a class="reference internal" href="kspace_modify.html"><span class="doc">kspace_modify</span></a>,
in which case the xy dimensions must be periodic and the z dimension must be
non-periodic.</p>
<p>For Ewald and PPPM, a simulation must be 3d and periodic in all
dimensions. The only exception is if the slab option is set with
<a class="reference internal" href="kspace_modify.html"><span class="doc">kspace_modify</span></a>, in which case the xy dimensions
must be periodic and the z dimension must be non-periodic.</p>
</div>
<div class="section" id="related-commands">
<h2>Related commands</h2>
@ -435,9 +440,10 @@ and Computation 5, 2322 (2009)</p>
<p id="veld"><strong>(Veld)</strong> In &#8216;t Veld, Ismail, Grest, J Chem Phys, 127, 144711 (2007).</p>
<p id="toukmaji"><strong>(Toukmaji)</strong> Toukmaji, Sagui, Board, and Darden, J Chem Phys, 113,
10913 (2000).</p>
<p id="isele-holder"><strong>(Isele-Holder)</strong> Isele-Holder, Mitchell, Ismail, J Chem Phys, 137, 174107 (2012).</p>
<p id="isele-holder2"><strong>(Isele-Holder2)</strong> Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail, J Chem Theory
Comput 9, 5412 (2013).</p>
<p id="isele-holder"><strong>(Isele-Holder)</strong> Isele-Holder, Mitchell, Ismail, J Chem Phys, 137,
174107 (2012).</p>
<p id="isele-holder2"><strong>(Isele-Holder2)</strong> Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail,
J Chem Theory Comput 9, 5412 (2013).</p>
<p id="hardy"><strong>(Hardy)</strong> David Hardy thesis: Multilevel Summation for the Fast
Evaluation of Forces for the Simulation of Biomolecules, University of
Illinois at Urbana-Champaign, (2006).</p>

View File

@ -175,6 +175,9 @@ The {bflag2} argument is ignored.
[Specifics of body style rounded/polygon:]
NOTE: Aug 2016 - This body style has not yet been added to LAMMPS.
The info below is a placeholder.
The {rounded/polygon} body style represents body particles as a convex
polygon with a variable number N > 2 of vertices, which can only be
used for 2d models. One example use of this body style is for 2d
@ -235,8 +238,9 @@ particles whose edge length is sqrt(2):
0 1 1 2 2 3 3 0
1.0 :pre
The "pair_style body/rounded/polygon"_pair_body_rounded_polygon.html command
can be used with this body style to compute body/body interactions.
The "pair_style body/rounded/polygon"_pair_body_rounded_polygon.html
command can be used with this body style to compute body/body
interactions.
For output purposes via the "compute
body/local"_compute_body_local.html and "dump local"_dump.html

View File

@ -388,6 +388,9 @@ objects to be drawn. An example is the "fix
surface/global"_fix_surface_global.html command which can draw lines
or triangles for 2d/3d simulations.
NOTE: Aug 2016 - The fix surface/global command is not yet added to
LAMMPS.
The {fflag1} and {fflag2} settings are numerical values which are
passed to the fix to affect how the drawing of its objects is done.
See the individual fix doc page for a description of what these

View File

@ -12,7 +12,7 @@ kspace_style command :h3
kspace_style style value :pre
style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg} or {pppm/disp} or {pppm/tip4p} or {pppm/stagger} or {pppm/disp/tip4p} or {pppm/gpu} or {pppm/omp} or {pppm/cg/omp} or {pppm/tip4p/omp} or {msm} or {msm/cg} or {msm/omp} or {msm/cg/omp} :ulb,l
style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg} or {pppm/disp} or {pppm/tip4p} or {pppm/stagger} or {pppm/disp/tip4p} or {pppm/gpu} or {pppm/kk} or {pppm/omp} or {pppm/cg/omp} or {pppm/tip4p/omp} or {msm} or {msm/cg} or {msm/omp} or {msm/cg/omp} :ulb,l
{none} value = none
{ewald} value = accuracy
accuracy = desired relative error in forces
@ -33,6 +33,8 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
accuracy = desired relative error in forces
{pppm/gpu} value = accuracy
accuracy = desired relative error in forces
{pppm/kk} value = accuracy
accuracy = desired relative error in forces
{pppm/omp} value = accuracy
accuracy = desired relative error in forces
{pppm/cg/omp} value = accuracy
@ -271,6 +273,10 @@ calculation can be performed concurrently on the GPU while other
calculations for non-bonded and bonded force calculation are performed
on the CPU.
The {pppm/kk} style also performs charge assignment and force
interpolation calculations on the GPU while the FFTs themselves are
calculated on the CPU in non-threaded mode.
These accelerated styles are part of the GPU, USER-INTEL,
KOKKOS, USER-OMP, and OPT packages respectively. They are only
enabled if LAMMPS was built with those packages. See the "Making
@ -299,10 +305,10 @@ For MSM, a simulation must be 3d and one can use any combination of
periodic, non-periodic, or shrink-wrapped boundaries (specified using
the "boundary"_boundary.html command).
For Ewald and PPPM, a simulation must be 3d and periodic in all dimensions.
The only exception is if the slab option is set with "kspace_modify"_kspace_modify.html,
in which case the xy dimensions must be periodic and the z dimension must be
non-periodic.
For Ewald and PPPM, a simulation must be 3d and periodic in all
dimensions. The only exception is if the slab option is set with
"kspace_modify"_kspace_modify.html, in which case the xy dimensions
must be periodic and the z dimension must be non-periodic.
[Related commands:]
@ -354,11 +360,12 @@ and Computation 5, 2322 (2009)
10913 (2000).
:link(Isele-Holder)
[(Isele-Holder)] Isele-Holder, Mitchell, Ismail, J Chem Phys, 137, 174107 (2012).
[(Isele-Holder)] Isele-Holder, Mitchell, Ismail, J Chem Phys, 137,
174107 (2012).
:link(Isele-Holder2)
[(Isele-Holder2)] Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail, J Chem Theory
Comput 9, 5412 (2013).
[(Isele-Holder2)] Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail,
J Chem Theory Comput 9, 5412 (2013).
:link(Hardy)
[(Hardy)] David Hardy thesis: Multilevel Summation for the Fast

View File

@ -87,6 +87,8 @@ action fix_setforce_kokkos.cpp
action fix_setforce_kokkos.h
action fix_wall_reflect_kokkos.cpp
action fix_wall_reflect_kokkos.h
action gridcomm_kokkos.cpp gridcomm.cpp
action gridcomm_kokkos.h gridcomm.h
action improper_harmonic_kokkos.cpp improper_harmonic.cpp
action improper_harmonic_kokkos.h improper_harmonic.h
action kokkos.cpp
@ -102,6 +104,8 @@ action neigh_list_kokkos.cpp
action neigh_list_kokkos.h
action neighbor_kokkos.cpp
action neighbor_kokkos.h
action math_special_kokkos.cpp
action math_special_kokkos.h
action pair_buck_coul_cut_kokkos.cpp
action pair_buck_coul_cut_kokkos.h
action pair_buck_coul_long_kokkos.cpp pair_buck_coul_long.cpp
@ -167,6 +171,8 @@ action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp
action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h
action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
action pppm_kokkos.cpp pppm.cpp
action pppm_kokkos.h pppm.h
action region_block_kokkos.cpp
action region_block_kokkos.h
action verlet_kokkos.cpp

View File

@ -0,0 +1,614 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#include "gridcomm_kokkos.h"
#include "comm.h"
#include "kspace.h"
#include "memory.h"
#include "error.h"
using namespace LAMMPS_NS;
#define SWAPDELTA 8
/* ---------------------------------------------------------------------- */
template<class DeviceType>
GridCommKokkos<DeviceType>::GridCommKokkos(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
: Pointers(lmp)
{
gridcomm = gcomm;
MPI_Comm_rank(gridcomm,&me);
nforward = forward;
nreverse = reverse;
inxlo = ixlo;
inxhi = ixhi;
inylo = iylo;
inyhi = iyhi;
inzlo = izlo;
inzhi = izhi;
outxlo = oxlo;
outxhi = oxhi;
outylo = oylo;
outyhi = oyhi;
outzlo = ozlo;
outzhi = ozhi;
outxlo_max = oxlo;
outxhi_max = oxhi;
outylo_max = oylo;
outyhi_max = oyhi;
outzlo_max = ozlo;
outzhi_max = ozhi;
procxlo = pxlo;
procxhi = pxhi;
procylo = pylo;
procyhi = pyhi;
proczlo = pzlo;
proczhi = pzhi;
nswap = 0;
swap = NULL;
//buf1 = buf2 = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
GridCommKokkos<DeviceType>::GridCommKokkos(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max,
int ozlo_max, int ozhi_max,
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
: Pointers(lmp)
{
gridcomm = gcomm;
MPI_Comm_rank(gridcomm,&me);
nforward = forward;
nreverse = reverse;
inxlo = ixlo;
inxhi = ixhi;
inylo = iylo;
inyhi = iyhi;
inzlo = izlo;
inzhi = izhi;
outxlo = oxlo;
outxhi = oxhi;
outylo = oylo;
outyhi = oyhi;
outzlo = ozlo;
outzhi = ozhi;
outxlo_max = oxlo_max;
outxhi_max = oxhi_max;
outylo_max = oylo_max;
outyhi_max = oyhi_max;
outzlo_max = ozlo_max;
outzhi_max = ozhi_max;
procxlo = pxlo;
procxhi = pxhi;
procylo = pylo;
procyhi = pyhi;
proczlo = pzlo;
proczhi = pzhi;
nswap = 0;
swap = NULL;
//buf1 = buf2 = NULL;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
GridCommKokkos<DeviceType>::~GridCommKokkos()
{
for (int i = 0; i < nswap; i++) {
//memory->destroy_kokkos(swap[i].k_packlist,swap[i].packlist);
//memory->destroy_kokkos(swap[i].k_unpacklist,swap[i].unpacklist);
}
memory->sfree(swap);
//memory->destroy(buf1);
//memory->destroy(buf2);
}
/* ----------------------------------------------------------------------
notify 6 neighbor procs how many ghost grid planes I need from them
ghostxlo = # of lower grid planes I own that are needed from me
by procxlo to become its upper ghost planes
ghostxhi = # of upper grid planes I own that are needed from me
by procxhi to become its lower ghost planes
if no neighbor proc, value is from self
------------------------------------------------------------------------- */
template<class DeviceType>
void GridCommKokkos<DeviceType>::ghost_notify()
{
int nplanes = inxlo - outxlo;
if (procxlo != me)
MPI_Sendrecv(&nplanes,1,MPI_INT,procxlo,0,
&ghostxhi,1,MPI_INT,procxhi,0,gridcomm,MPI_STATUS_IGNORE);
else ghostxhi = nplanes;
nplanes = outxhi - inxhi;
if (procxhi != me)
MPI_Sendrecv(&nplanes,1,MPI_INT,procxhi,0,
&ghostxlo,1,MPI_INT,procxlo,0,gridcomm,MPI_STATUS_IGNORE);
else ghostxlo = nplanes;
nplanes = inylo - outylo;
if (procylo != me)
MPI_Sendrecv(&nplanes,1,MPI_INT,procylo,0,
&ghostyhi,1,MPI_INT,procyhi,0,gridcomm,MPI_STATUS_IGNORE);
else ghostyhi = nplanes;
nplanes = outyhi - inyhi;
if (procyhi != me)
MPI_Sendrecv(&nplanes,1,MPI_INT,procyhi,0,
&ghostylo,1,MPI_INT,procylo,0,gridcomm,MPI_STATUS_IGNORE);
else ghostylo = nplanes;
nplanes = inzlo - outzlo;
if (proczlo != me)
MPI_Sendrecv(&nplanes,1,MPI_INT,proczlo,0,
&ghostzhi,1,MPI_INT,proczhi,0,gridcomm,MPI_STATUS_IGNORE);
else ghostzhi = nplanes;
nplanes = outzhi - inzhi;
if (proczhi != me)
MPI_Sendrecv(&nplanes,1,MPI_INT,proczhi,0,
&ghostzlo,1,MPI_INT,proczlo,0,gridcomm,MPI_STATUS_IGNORE);
else ghostzlo = nplanes;
}
/* ----------------------------------------------------------------------
check if all ghost grid comm needs overlap into non nearest-neighbor proc
if yes, return 1, else return 0
------------------------------------------------------------------------- */
template<class DeviceType>
int GridCommKokkos<DeviceType>::ghost_overlap()
{
int nearest = 0;
if (ghostxlo > inxhi-inxlo+1) nearest = 1;
if (ghostxhi > inxhi-inxlo+1) nearest = 1;
if (ghostylo > inyhi-inylo+1) nearest = 1;
if (ghostyhi > inyhi-inylo+1) nearest = 1;
if (ghostzlo > inzhi-inzlo+1) nearest = 1;
if (ghostzhi > inzhi-inzlo+1) nearest = 1;
int nearest_all;
MPI_Allreduce(&nearest,&nearest_all,1,MPI_INT,MPI_MIN,gridcomm);
return nearest_all;
}
/* ----------------------------------------------------------------------
create swap stencil for grid own/ghost communication
swaps covers all 3 dimensions and both directions
swaps cover multiple iterations in a direction if need grid pts
from further away than nearest-neighbor proc
same swap list used by forward and reverse communication
------------------------------------------------------------------------- */
template<class DeviceType>
void GridCommKokkos<DeviceType>::setup()
{
int nsent,sendfirst,sendlast,recvfirst,recvlast;
int sendplanes,recvplanes;
int notdoneme,notdone;
int maxswap = 6;
swap = (Swap *) memory->smalloc(maxswap*sizeof(Swap),"Commgrid:swap");
k_packlist = DAT::tdual_int_2d("Commgrid:packlist",maxswap,1);
k_unpacklist = DAT::tdual_int_2d("Commgrid:unpacklist",maxswap,1);
nswap = 0;
// send own grid pts to -x processor, recv ghost grid pts from +x processor
nsent = 0;
sendfirst = inxlo;
sendlast = inxhi;
recvfirst = inxhi+1;
notdone = 1;
while (notdone) {
if (nswap == maxswap) {
maxswap += SWAPDELTA;
swap = (Swap *)
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
k_packlist.resize(maxswap,k_packlist.dimension_1());
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
}
swap[nswap].sendproc = procxlo;
swap[nswap].recvproc = procxhi;
sendplanes = MIN(sendlast-sendfirst+1,ghostxlo-nsent);
swap[nswap].npack =
indices(k_packlist,nswap,
sendfirst,sendfirst+sendplanes-1,inylo,inyhi,inzlo,inzhi);
if (procxlo != me)
MPI_Sendrecv(&sendplanes,1,MPI_INT,procxlo,0,
&recvplanes,1,MPI_INT,procxhi,0,gridcomm,MPI_STATUS_IGNORE);
else recvplanes = sendplanes;
swap[nswap].nunpack =
indices(k_unpacklist,nswap,
recvfirst,recvfirst+recvplanes-1,inylo,inyhi,inzlo,inzhi);
nsent += sendplanes;
sendfirst += sendplanes;
sendlast += recvplanes;
recvfirst += recvplanes;
nswap++;
if (nsent < ghostxlo) notdoneme = 1;
else notdoneme = 0;
MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
}
// send own grid pts to +x processor, recv ghost grid pts from -x processor
nsent = 0;
sendfirst = inxlo;
sendlast = inxhi;
recvlast = inxlo-1;
notdone = 1;
while (notdone) {
if (nswap == maxswap) {
maxswap += 1;
swap = (Swap *)
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
k_packlist.resize(maxswap,k_packlist.dimension_1());
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
}
swap[nswap].sendproc = procxhi;
swap[nswap].recvproc = procxlo;
sendplanes = MIN(sendlast-sendfirst+1,ghostxhi-nsent);
swap[nswap].npack =
indices(k_packlist,nswap,
sendlast-sendplanes+1,sendlast,inylo,inyhi,inzlo,inzhi);
if (procxhi != me)
MPI_Sendrecv(&sendplanes,1,MPI_INT,procxhi,0,
&recvplanes,1,MPI_INT,procxlo,0,gridcomm,MPI_STATUS_IGNORE);
else recvplanes = sendplanes;
swap[nswap].nunpack =
indices(k_unpacklist,nswap,
recvlast-recvplanes+1,recvlast,inylo,inyhi,inzlo,inzhi);
nsent += sendplanes;
sendfirst -= recvplanes;
sendlast -= sendplanes;
recvlast -= recvplanes;
nswap++;
if (nsent < ghostxhi) notdoneme = 1;
else notdoneme = 0;
MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
}
// send own grid pts to -y processor, recv ghost grid pts from +y processor
nsent = 0;
sendfirst = inylo;
sendlast = inyhi;
recvfirst = inyhi+1;
notdone = 1;
while (notdone) {
if (nswap == maxswap) {
maxswap += SWAPDELTA;
swap = (Swap *)
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
k_packlist.resize(maxswap,k_packlist.dimension_1());
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
}
swap[nswap].sendproc = procylo;
swap[nswap].recvproc = procyhi;
sendplanes = MIN(sendlast-sendfirst+1,ghostylo-nsent);
swap[nswap].npack =
indices(k_packlist,nswap,
outxlo,outxhi,sendfirst,sendfirst+sendplanes-1,inzlo,inzhi);
if (procylo != me)
MPI_Sendrecv(&sendplanes,1,MPI_INT,procylo,0,
&recvplanes,1,MPI_INT,procyhi,0,gridcomm,MPI_STATUS_IGNORE);
else recvplanes = sendplanes;
swap[nswap].nunpack =
indices(k_unpacklist,nswap,
outxlo,outxhi,recvfirst,recvfirst+recvplanes-1,inzlo,inzhi);
nsent += sendplanes;
sendfirst += sendplanes;
sendlast += recvplanes;
recvfirst += recvplanes;
nswap++;
if (nsent < ghostylo) notdoneme = 1;
else notdoneme = 0;
MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
}
// send own grid pts to +y processor, recv ghost grid pts from -y processor
nsent = 0;
sendfirst = inylo;
sendlast = inyhi;
recvlast = inylo-1;
notdone = 1;
while (notdone) {
if (nswap == maxswap) {
maxswap += 1;
swap = (Swap *)
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
k_packlist.resize(maxswap,k_packlist.dimension_1());
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
}
swap[nswap].sendproc = procyhi;
swap[nswap].recvproc = procylo;
sendplanes = MIN(sendlast-sendfirst+1,ghostyhi-nsent);
swap[nswap].npack =
indices(k_packlist,nswap,
outxlo,outxhi,sendlast-sendplanes+1,sendlast,inzlo,inzhi);
if (procyhi != me)
MPI_Sendrecv(&sendplanes,1,MPI_INT,procyhi,0,
&recvplanes,1,MPI_INT,procylo,0,gridcomm,MPI_STATUS_IGNORE);
else recvplanes = sendplanes;
swap[nswap].nunpack =
indices(k_unpacklist,nswap,
outxlo,outxhi,recvlast-recvplanes+1,recvlast,inzlo,inzhi);
nsent += sendplanes;
sendfirst -= recvplanes;
sendlast -= sendplanes;
recvlast -= recvplanes;
nswap++;
if (nsent < ghostyhi) notdoneme = 1;
else notdoneme = 0;
MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
}
// send own grid pts to -z processor, recv ghost grid pts from +z processor
nsent = 0;
sendfirst = inzlo;
sendlast = inzhi;
recvfirst = inzhi+1;
notdone = 1;
while (notdone) {
if (nswap == maxswap) {
maxswap += SWAPDELTA;
swap = (Swap *)
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
k_packlist.resize(maxswap,k_packlist.dimension_1());
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
}
swap[nswap].sendproc = proczlo;
swap[nswap].recvproc = proczhi;
sendplanes = MIN(sendlast-sendfirst+1,ghostzlo-nsent);
swap[nswap].npack =
indices(k_packlist,nswap,
outxlo,outxhi,outylo,outyhi,sendfirst,sendfirst+sendplanes-1);
if (proczlo != me)
MPI_Sendrecv(&sendplanes,1,MPI_INT,proczlo,0,
&recvplanes,1,MPI_INT,proczhi,0,gridcomm,MPI_STATUS_IGNORE);
else recvplanes = sendplanes;
swap[nswap].nunpack =
indices(k_unpacklist,nswap,
outxlo,outxhi,outylo,outyhi,recvfirst,recvfirst+recvplanes-1);
nsent += sendplanes;
sendfirst += sendplanes;
sendlast += recvplanes;
recvfirst += recvplanes;
nswap++;
if (nsent < ghostzlo) notdoneme = 1;
else notdoneme = 0;
MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
}
// send own grid pts to +z processor, recv ghost grid pts from -z processor
nsent = 0;
sendfirst = inzlo;
sendlast = inzhi;
recvlast = inzlo-1;
notdone = 1;
while (notdone) {
if (nswap == maxswap) {
maxswap += 1;
swap = (Swap *)
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
k_packlist.resize(maxswap,k_packlist.dimension_1());
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
}
swap[nswap].sendproc = proczhi;
swap[nswap].recvproc = proczlo;
sendplanes = MIN(sendlast-sendfirst+1,ghostzhi-nsent);
swap[nswap].npack =
indices(k_packlist,nswap,
outxlo,outxhi,outylo,outyhi,sendlast-sendplanes+1,sendlast);
if (proczhi != me)
MPI_Sendrecv(&sendplanes,1,MPI_INT,proczhi,0,
&recvplanes,1,MPI_INT,proczlo,0,gridcomm,MPI_STATUS_IGNORE);
else recvplanes = sendplanes;
swap[nswap].nunpack =
indices(k_unpacklist,nswap,
outxlo,outxhi,outylo,outyhi,recvlast-recvplanes+1,recvlast);
nsent += sendplanes;
sendfirst -= recvplanes;
sendlast -= sendplanes;
recvlast -= recvplanes;
nswap++;
if (nsent < ghostzhi) notdoneme = 1;
else notdoneme = 0;
MPI_Allreduce(&notdoneme,&notdone,1,MPI_INT,MPI_SUM,gridcomm);
}
// nbuf = max of any forward/reverse pack/unpack
nbuf = 0;
for (int i = 0; i < nswap; i++) {
nbuf = MAX(nbuf,swap[i].npack);
nbuf = MAX(nbuf,swap[i].nunpack);
}
nbuf *= MAX(nforward,nreverse);
//memory->create(buf1,nbuf,"Commgrid:buf1");
k_buf1 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf1",nbuf);
//memory->create(buf2,nbuf,"Commgrid:buf2");
k_buf2 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf2",nbuf);
}
/* ----------------------------------------------------------------------
use swap list in forward order to acquire copy of all needed ghost grid pts
------------------------------------------------------------------------- */
template<class DeviceType>
void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
{
k_packlist.sync<DeviceType>();
k_unpacklist.sync<DeviceType>();
for (int m = 0; m < nswap; m++) {
if (swap[m].sendproc == me)
kspace->pack_forward_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
else
kspace->pack_forward_kokkos(which,k_buf1,swap[m].npack,k_packlist,m);
if (swap[m].sendproc != me) {
MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nforward*swap[m].nunpack,MPI_FFT_SCALAR,
swap[m].recvproc,0,gridcomm,&request);
MPI_Send(k_buf1.view<DeviceType>().ptr_on_device(),nforward*swap[m].npack,MPI_FFT_SCALAR,
swap[m].sendproc,0,gridcomm);
MPI_Wait(&request,MPI_STATUS_IGNORE);
}
kspace->unpack_forward_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
}
}
/* ----------------------------------------------------------------------
use swap list in reverse order to compute fully summed value
for each owned grid pt that some other proc has copy of as a ghost grid pt
------------------------------------------------------------------------- */
template<class DeviceType>
void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
{
k_packlist.sync<DeviceType>();
k_unpacklist.sync<DeviceType>();
for (int m = nswap-1; m >= 0; m--) {
if (swap[m].recvproc == me)
kspace->pack_reverse_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
else
kspace->pack_reverse_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m);
if (swap[m].recvproc != me) {
MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nreverse*swap[m].npack,MPI_FFT_SCALAR,
swap[m].sendproc,0,gridcomm,&request);
MPI_Send(k_buf1.view<DeviceType>().ptr_on_device(),nreverse*swap[m].nunpack,MPI_FFT_SCALAR,
swap[m].recvproc,0,gridcomm);
MPI_Wait(&request,MPI_STATUS_IGNORE);
}
kspace->unpack_reverse_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
}
}
/* ----------------------------------------------------------------------
create 1d list of offsets into 3d array section (xlo:xhi,ylo:yhi,zlo:zhi)
assume 3d array is allocated as (0:outxhi_max-outxlo_max+1,0:outyhi_max-outylo_max+1,
0:outzhi_max-outzlo_max+1)
------------------------------------------------------------------------- */
template<class DeviceType>
int GridCommKokkos<DeviceType>::indices(DAT::tdual_int_2d &k_list, int index,
int xlo, int xhi, int ylo, int yhi, int zlo, int zhi)
{
int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
if (k_list.dimension_1() < nmax)
k_list.resize(k_list.dimension_0(),nmax);
int nx = (outxhi_max-outxlo_max+1);
int ny = (outyhi_max-outylo_max+1);
k_list.sync<LMPHostType>();
int n = 0;
int ix,iy,iz;
for (iz = zlo; iz <= zhi; iz++)
for (iy = ylo; iy <= yhi; iy++)
for (ix = xlo; ix <= xhi; ix++)
k_list.h_view(index,n++) = (iz-outzlo_max)*ny*nx + (iy-outylo_max)*nx + (ix-outxlo_max);
k_list.modify<LMPHostType>();
k_list.sync<DeviceType>();
return nmax;
}
/* ----------------------------------------------------------------------
memory usage of send/recv bufs
------------------------------------------------------------------------- */
template<class DeviceType>
double GridCommKokkos<DeviceType>::memory_usage()
{
double bytes = 2*nbuf * sizeof(double);
return bytes;
}
namespace LAMMPS_NS {
template class GridCommKokkos<LMPDeviceType>;
#ifdef KOKKOS_HAVE_CUDA
template class GridCommKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,96 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_GRIDCOMM_KOKKOS_H
#define LMP_GRIDCOMM_KOKKOS_H
#include "pointers.h"
#include "kokkos_type.h"
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
namespace LAMMPS_NS {
template<class DeviceType>
class GridCommKokkos : protected Pointers {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int,
int, int, int, int, int, int);
~GridCommKokkos();
void ghost_notify();
int ghost_overlap();
void setup();
void forward_comm(class KSpace *, int);
void reverse_comm(class KSpace *, int);
double memory_usage();
private:
int me;
int nforward,nreverse;
MPI_Comm gridcomm;
MPI_Request request;
// in = inclusive indices of 3d grid chunk that I own
// out = inclusive indices of 3d grid chunk I own plus ghosts I use
// proc = 6 neighbor procs that surround me
// ghost = # of my owned grid planes needed from me
// by each of 6 neighbor procs to become their ghost planes
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
int nbuf;
//FFT_SCALAR *buf1,*buf2;
DAT::tdual_FFT_SCALAR_1d k_buf1;
DAT::tdual_FFT_SCALAR_1d k_buf2;
struct Swap {
int sendproc; // proc to send to for forward comm
int recvproc; // proc to recv from for forward comm
int npack; // # of datums to pack
int nunpack; // # of datums to unpack
//int *packlist; // 3d array offsets to pack
//int *unpacklist; // 3d array offsets to unpack
};
DAT::tdual_int_2d k_packlist;
DAT::tdual_int_2d k_unpacklist;
int nswap;
Swap *swap;
int indices(DAT::tdual_int_2d &, int, int, int, int, int, int, int);
};
}
#endif

View File

@ -14,6 +14,8 @@
#ifndef LMP_LMPTYPE_KOKKOS_H
#define LMP_LMPTYPE_KOKKOS_H
#include "lmptype.h"
#include <Kokkos_Core.hpp>
#include <Kokkos_DualView.hpp>
#include <impl/Kokkos_Timer.hpp>
@ -24,6 +26,21 @@
#define ISFINITE(x) std::isfinite(x)
#endif
// User-settable FFT precision
// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag)
// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag)
#ifdef FFT_SINGLE
#define FFT_PRECISION 1
#define MPI_FFT_SCALAR MPI_FLOAT
typedef float FFT_SCALAR;
#else
#define FFT_PRECISION 2
#define MPI_FFT_SCALAR MPI_DOUBLE
typedef double FFT_SCALAR;
#endif
#define MAX_TYPES_STACKPARAMS 12
#define NeighClusterSize 8
@ -567,6 +584,32 @@ typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um;
typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
//Kspace
typedef Kokkos::
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d;
typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um;
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d;
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3;
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d;
typedef Kokkos::
DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um;
typedef Kokkos::
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
typedef tdual_int_64::t_dev t_int_64;
typedef tdual_int_64::t_dev_um t_int_64_um;
};
#ifdef KOKKOS_HAVE_CUDA
@ -801,6 +844,33 @@ typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um;
typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
//Kspace
typedef Kokkos::
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d;
typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um;
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d;
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3;
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d;
typedef Kokkos::
DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d;
typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um;
typedef Kokkos::
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
typedef tdual_int_64::t_host t_int_64;
typedef tdual_int_64::t_host_um t_int_64_um;
};
#endif
//default LAMMPS Types

View File

@ -0,0 +1,534 @@
#include <math.h>
#include <stdint.h>
#include "math_special_kokkos.h"
using namespace LAMMPS_NS;
/* Library libcerf:
* Compute complex error functions, based on a new implementation of
* Faddeeva's w_of_z. Also provide Dawson and Voigt functions.
*
* File erfcx.c:
* Compute erfcx(x) = exp(x^2) erfc(x) function, for real x,
* using a novel algorithm that is much faster than DERFC of SLATEC.
* This function is used in the computation of Faddeeva, Dawson, and
* other complex error functions.
*
* Copyright:
* (C) 2012 Massachusetts Institute of Technology
* (C) 2013 Forschungszentrum Jülich GmbH
*
* Licence:
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* Authors:
* Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author
* Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer
*
* Website:
* http://apps.jcns.fz-juelich.de/libcerf
*
* Revision history:
* ../CHANGELOG
*
* Manual page:
* man 3 erfcx
*/
/******************************************************************************/
/* Lookup-table for Chebyshev polynomials for smaller |x| */
/******************************************************************************/
double MathSpecialKokkos::erfcx_y100(const double y100)
{
// Steven G. Johnson, October 2012.
// Given y100=100*y, where y = 4/(4+x) for x >= 0, compute erfc(x).
// Uses a look-up table of 100 different Chebyshev polynomials
// for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated
// with the help of Maple and a little shell script. This allows
// the Chebyshev polynomials to be of significantly lower degree (about 1/4)
// compared to fitting the whole [0,1] interval with a single polynomial.
switch ((int) y100) {
case 0: {
double t = 2*y100 - 1;
return 0.70878032454106438663e-3 + (0.71234091047026302958e-3 + (0.35779077297597742384e-5 + (0.17403143962587937815e-7 + (0.81710660047307788845e-10 + (0.36885022360434957634e-12 + 0.15917038551111111111e-14 * t) * t) * t) * t) * t) * t;
}
case 1: {
double t = 2*y100 - 3;
return 0.21479143208285144230e-2 + (0.72686402367379996033e-3 + (0.36843175430938995552e-5 + (0.18071841272149201685e-7 + (0.85496449296040325555e-10 + (0.38852037518534291510e-12 + 0.16868473576888888889e-14 * t) * t) * t) * t) * t) * t;
}
case 2: {
double t = 2*y100 - 5;
return 0.36165255935630175090e-2 + (0.74182092323555510862e-3 + (0.37948319957528242260e-5 + (0.18771627021793087350e-7 + (0.89484715122415089123e-10 + (0.40935858517772440862e-12 + 0.17872061464888888889e-14 * t) * t) * t) * t) * t) * t;
}
case 3: {
double t = 2*y100 - 7;
return 0.51154983860031979264e-2 + (0.75722840734791660540e-3 + (0.39096425726735703941e-5 + (0.19504168704300468210e-7 + (0.93687503063178993915e-10 + (0.43143925959079664747e-12 + 0.18939926435555555556e-14 * t) * t) * t) * t) * t) * t;
}
case 4: {
double t = 2*y100 - 9;
return 0.66457513172673049824e-2 + (0.77310406054447454920e-3 + (0.40289510589399439385e-5 + (0.20271233238288381092e-7 + (0.98117631321709100264e-10 + (0.45484207406017752971e-12 + 0.20076352213333333333e-14 * t) * t) * t) * t) * t) * t;
}
case 5: {
double t = 2*y100 - 11;
return 0.82082389970241207883e-2 + (0.78946629611881710721e-3 + (0.41529701552622656574e-5 + (0.21074693344544655714e-7 + (0.10278874108587317989e-9 + (0.47965201390613339638e-12 + 0.21285907413333333333e-14 * t) * t) * t) * t) * t) * t;
}
case 6: {
double t = 2*y100 - 13;
return 0.98039537275352193165e-2 + (0.80633440108342840956e-3 + (0.42819241329736982942e-5 + (0.21916534346907168612e-7 + (0.10771535136565470914e-9 + (0.50595972623692822410e-12 + 0.22573462684444444444e-14 * t) * t) * t) * t) * t) * t;
}
case 7: {
double t = 2*y100 - 15;
return 0.11433927298290302370e-1 + (0.82372858383196561209e-3 + (0.44160495311765438816e-5 + (0.22798861426211986056e-7 + (0.11291291745879239736e-9 + (0.53386189365816880454e-12 + 0.23944209546666666667e-14 * t) * t) * t) * t) * t) * t;
}
case 8: {
double t = 2*y100 - 17;
return 0.13099232878814653979e-1 + (0.84167002467906968214e-3 + (0.45555958988457506002e-5 + (0.23723907357214175198e-7 + (0.11839789326602695603e-9 + (0.56346163067550237877e-12 + 0.25403679644444444444e-14 * t) * t) * t) * t) * t) * t;
}
case 9: {
double t = 2*y100 - 19;
return 0.14800987015587535621e-1 + (0.86018092946345943214e-3 + (0.47008265848816866105e-5 + (0.24694040760197315333e-7 + (0.12418779768752299093e-9 + (0.59486890370320261949e-12 + 0.26957764568888888889e-14 * t) * t) * t) * t) * t) * t;
}
case 10: {
double t = 2*y100 - 21;
return 0.16540351739394069380e-1 + (0.87928458641241463952e-3 + (0.48520195793001753903e-5 + (0.25711774900881709176e-7 + (0.13030128534230822419e-9 + (0.62820097586874779402e-12 + 0.28612737351111111111e-14 * t) * t) * t) * t) * t) * t;
}
case 11: {
double t = 2*y100 - 23;
return 0.18318536789842392647e-1 + (0.89900542647891721692e-3 + (0.50094684089553365810e-5 + (0.26779777074218070482e-7 + (0.13675822186304615566e-9 + (0.66358287745352705725e-12 + 0.30375273884444444444e-14 * t) * t) * t) * t) * t) * t;
}
case 12: {
double t = 2*y100 - 25;
return 0.20136801964214276775e-1 + (0.91936908737673676012e-3 + (0.51734830914104276820e-5 + (0.27900878609710432673e-7 + (0.14357976402809042257e-9 + (0.70114790311043728387e-12 + 0.32252476000000000000e-14 * t) * t) * t) * t) * t) * t;
}
case 13: {
double t = 2*y100 - 27;
return 0.21996459598282740954e-1 + (0.94040248155366777784e-3 + (0.53443911508041164739e-5 + (0.29078085538049374673e-7 + (0.15078844500329731137e-9 + (0.74103813647499204269e-12 + 0.34251892320000000000e-14 * t) * t) * t) * t) * t) * t;
}
case 14: {
double t = 2*y100 - 29;
return 0.23898877187226319502e-1 + (0.96213386835900177540e-3 + (0.55225386998049012752e-5 + (0.30314589961047687059e-7 + (0.15840826497296335264e-9 + (0.78340500472414454395e-12 + 0.36381553564444444445e-14 * t) * t) * t) * t) * t) * t;
}
case 15: {
double t = 2*y100 - 31;
return 0.25845480155298518485e-1 + (0.98459293067820123389e-3 + (0.57082915920051843672e-5 + (0.31613782169164830118e-7 + (0.16646478745529630813e-9 + (0.82840985928785407942e-12 + 0.38649975768888888890e-14 * t) * t) * t) * t) * t) * t;
}
case 16: {
double t = 2*y100 - 33;
return 0.27837754783474696598e-1 + (0.10078108563256892757e-2 + (0.59020366493792212221e-5 + (0.32979263553246520417e-7 + (0.17498524159268458073e-9 + (0.87622459124842525110e-12 + 0.41066206488888888890e-14 * t) * t) * t) * t) * t) * t;
}
case 17: {
double t = 2*y100 - 35;
return 0.29877251304899307550e-1 + (0.10318204245057349310e-2 + (0.61041829697162055093e-5 + (0.34414860359542720579e-7 + (0.18399863072934089607e-9 + (0.92703227366365046533e-12 + 0.43639844053333333334e-14 * t) * t) * t) * t) * t) * t;
}
case 18: {
double t = 2*y100 - 37;
return 0.31965587178596443475e-1 + (0.10566560976716574401e-2 + (0.63151633192414586770e-5 + (0.35924638339521924242e-7 + (0.19353584758781174038e-9 + (0.98102783859889264382e-12 + 0.46381060817777777779e-14 * t) * t) * t) * t) * t) * t;
}
case 19: {
double t = 2*y100 - 39;
return 0.34104450552588334840e-1 + (0.10823541191350532574e-2 + (0.65354356159553934436e-5 + (0.37512918348533521149e-7 + (0.20362979635817883229e-9 + (0.10384187833037282363e-11 + 0.49300625262222222221e-14 * t) * t) * t) * t) * t) * t;
}
case 20: {
double t = 2*y100 - 41;
return 0.36295603928292425716e-1 + (0.11089526167995268200e-2 + (0.67654845095518363577e-5 + (0.39184292949913591646e-7 + (0.21431552202133775150e-9 + (0.10994259106646731797e-11 + 0.52409949102222222221e-14 * t) * t) * t) * t) * t) * t;
}
case 21: {
double t = 2*y100 - 43;
return 0.38540888038840509795e-1 + (0.11364917134175420009e-2 + (0.70058230641246312003e-5 + (0.40943644083718586939e-7 + (0.22563034723692881631e-9 + (0.11642841011361992885e-11 + 0.55721092871111111110e-14 * t) * t) * t) * t) * t) * t;
}
case 22: {
double t = 2*y100 - 45;
return 0.40842225954785960651e-1 + (0.11650136437945673891e-2 + (0.72569945502343006619e-5 + (0.42796161861855042273e-7 + (0.23761401711005024162e-9 + (0.12332431172381557035e-11 + 0.59246802364444444445e-14 * t) * t) * t) * t) * t) * t;
}
case 23: {
double t = 2*y100 - 47;
return 0.43201627431540222422e-1 + (0.11945628793917272199e-2 + (0.75195743532849206263e-5 + (0.44747364553960993492e-7 + (0.25030885216472953674e-9 + (0.13065684400300476484e-11 + 0.63000532853333333334e-14 * t) * t) * t) * t) * t) * t;
}
case 24: {
double t = 2*y100 - 49;
return 0.45621193513810471438e-1 + (0.12251862608067529503e-2 + (0.77941720055551920319e-5 + (0.46803119830954460212e-7 + (0.26375990983978426273e-9 + (0.13845421370977119765e-11 + 0.66996477404444444445e-14 * t) * t) * t) * t) * t) * t;
}
case 25: {
double t = 2*y100 - 51;
return 0.48103121413299865517e-1 + (0.12569331386432195113e-2 + (0.80814333496367673980e-5 + (0.48969667335682018324e-7 + (0.27801515481905748484e-9 + (0.14674637611609884208e-11 + 0.71249589351111111110e-14 * t) * t) * t) * t) * t) * t;
}
case 26: {
double t = 2*y100 - 53;
return 0.50649709676983338501e-1 + (0.12898555233099055810e-2 + (0.83820428414568799654e-5 + (0.51253642652551838659e-7 + (0.29312563849675507232e-9 + (0.15556512782814827846e-11 + 0.75775607822222222221e-14 * t) * t) * t) * t) * t) * t;
}
case 27: {
double t = 2*y100 - 55;
return 0.53263363664388864181e-1 + (0.13240082443256975769e-2 + (0.86967260015007658418e-5 + (0.53662102750396795566e-7 + (0.30914568786634796807e-9 + (0.16494420240828493176e-11 + 0.80591079644444444445e-14 * t) * t) * t) * t) * t) * t;
}
case 28: {
double t = 2*y100 - 57;
return 0.55946601353500013794e-1 + (0.13594491197408190706e-2 + (0.90262520233016380987e-5 + (0.56202552975056695376e-7 + (0.32613310410503135996e-9 + (0.17491936862246367398e-11 + 0.85713381688888888890e-14 * t) * t) * t) * t) * t) * t;
}
case 29: {
double t = 2*y100 - 59;
return 0.58702059496154081813e-1 + (0.13962391363223647892e-2 + (0.93714365487312784270e-5 + (0.58882975670265286526e-7 + (0.34414937110591753387e-9 + (0.18552853109751857859e-11 + 0.91160736711111111110e-14 * t) * t) * t) * t) * t) * t;
}
case 30: {
double t = 2*y100 - 61;
return 0.61532500145144778048e-1 + (0.14344426411912015247e-2 + (0.97331446201016809696e-5 + (0.61711860507347175097e-7 + (0.36325987418295300221e-9 + (0.19681183310134518232e-11 + 0.96952238400000000000e-14 * t) * t) * t) * t) * t) * t;
}
case 31: {
double t = 2*y100 - 63;
return 0.64440817576653297993e-1 + (0.14741275456383131151e-2 + (0.10112293819576437838e-4 + (0.64698236605933246196e-7 + (0.38353412915303665586e-9 + (0.20881176114385120186e-11 + 0.10310784480000000000e-13 * t) * t) * t) * t) * t) * t;
}
case 32: {
double t = 2*y100 - 65;
return 0.67430045633130393282e-1 + (0.15153655418916540370e-2 + (0.10509857606888328667e-4 + (0.67851706529363332855e-7 + (0.40504602194811140006e-9 + (0.22157325110542534469e-11 + 0.10964842115555555556e-13 * t) * t) * t) * t) * t) * t;
}
case 33: {
double t = 2*y100 - 67;
return 0.70503365513338850709e-1 + (0.15582323336495709827e-2 + (0.10926868866865231089e-4 + (0.71182482239613507542e-7 + (0.42787405890153386710e-9 + (0.23514379522274416437e-11 + 0.11659571751111111111e-13 * t) * t) * t) * t) * t) * t;
}
case 34: {
double t = 2*y100 - 69;
return 0.73664114037944596353e-1 + (0.16028078812438820413e-2 + (0.11364423678778207991e-4 + (0.74701423097423182009e-7 + (0.45210162777476488324e-9 + (0.24957355004088569134e-11 + 0.12397238257777777778e-13 * t) * t) * t) * t) * t) * t;
}
case 35: {
double t = 2*y100 - 71;
return 0.76915792420819562379e-1 + (0.16491766623447889354e-2 + (0.11823685320041302169e-4 + (0.78420075993781544386e-7 + (0.47781726956916478925e-9 + (0.26491544403815724749e-11 + 0.13180196462222222222e-13 * t) * t) * t) * t) * t) * t;
}
case 36: {
double t = 2*y100 - 73;
return 0.80262075578094612819e-1 + (0.16974279491709504117e-2 + (0.12305888517309891674e-4 + (0.82350717698979042290e-7 + (0.50511496109857113929e-9 + (0.28122528497626897696e-11 + 0.14010889635555555556e-13 * t) * t) * t) * t) * t) * t;
}
case 37: {
double t = 2*y100 - 75;
return 0.83706822008980357446e-1 + (0.17476561032212656962e-2 + (0.12812343958540763368e-4 + (0.86506399515036435592e-7 + (0.53409440823869467453e-9 + (0.29856186620887555043e-11 + 0.14891851591111111111e-13 * t) * t) * t) * t) * t) * t;
}
case 38: {
double t = 2*y100 - 77;
return 0.87254084284461718231e-1 + (0.17999608886001962327e-2 + (0.13344443080089492218e-4 + (0.90900994316429008631e-7 + (0.56486134972616465316e-9 + (0.31698707080033956934e-11 + 0.15825697795555555556e-13 * t) * t) * t) * t) * t) * t;
}
case 39: {
double t = 2*y100 - 79;
return 0.90908120182172748487e-1 + (0.18544478050657699758e-2 + (0.13903663143426120077e-4 + (0.95549246062549906177e-7 + (0.59752787125242054315e-9 + (0.33656597366099099413e-11 + 0.16815130613333333333e-13 * t) * t) * t) * t) * t) * t;
}
case 40: {
double t = 2*y100 - 81;
return 0.94673404508075481121e-1 + (0.19112284419887303347e-2 + (0.14491572616545004930e-4 + (0.10046682186333613697e-6 + (0.63221272959791000515e-9 + (0.35736693975589130818e-11 + 0.17862931591111111111e-13 * t) * t) * t) * t) * t) * t;
}
case 41: {
double t = 2*y100 - 83;
return 0.98554641648004456555e-1 + (0.19704208544725622126e-2 + (0.15109836875625443935e-4 + (0.10567036667675984067e-6 + (0.66904168640019354565e-9 + (0.37946171850824333014e-11 + 0.18971959040000000000e-13 * t) * t) * t) * t) * t) * t;
}
case 42: {
double t = 2*y100 - 85;
return 0.10255677889470089531e0 + (0.20321499629472857418e-2 + (0.15760224242962179564e-4 + (0.11117756071353507391e-6 + (0.70814785110097658502e-9 + (0.40292553276632563925e-11 + 0.20145143075555555556e-13 * t) * t) * t) * t) * t) * t;
}
case 43: {
double t = 2*y100 - 87;
return 0.10668502059865093318e0 + (0.20965479776148731610e-2 + (0.16444612377624983565e-4 + (0.11700717962026152749e-6 + (0.74967203250938418991e-9 + (0.42783716186085922176e-11 + 0.21385479360000000000e-13 * t) * t) * t) * t) * t) * t;
}
case 44: {
double t = 2*y100 - 89;
return 0.11094484319386444474e0 + (0.21637548491908170841e-2 + (0.17164995035719657111e-4 + (0.12317915750735938089e-6 + (0.79376309831499633734e-9 + (0.45427901763106353914e-11 + 0.22696025653333333333e-13 * t) * t) * t) * t) * t) * t;
}
case 45: {
double t = 2*y100 - 91;
return 0.11534201115268804714e0 + (0.22339187474546420375e-2 + (0.17923489217504226813e-4 + (0.12971465288245997681e-6 + (0.84057834180389073587e-9 + (0.48233721206418027227e-11 + 0.24079890062222222222e-13 * t) * t) * t) * t) * t) * t;
}
case 46: {
double t = 2*y100 - 93;
return 0.11988259392684094740e0 + (0.23071965691918689601e-2 + (0.18722342718958935446e-4 + (0.13663611754337957520e-6 + (0.89028385488493287005e-9 + (0.51210161569225846701e-11 + 0.25540227111111111111e-13 * t) * t) * t) * t) * t) * t;
}
case 47: {
double t = 2*y100 - 95;
return 0.12457298393509812907e0 + (0.23837544771809575380e-2 + (0.19563942105711612475e-4 + (0.14396736847739470782e-6 + (0.94305490646459247016e-9 + (0.54366590583134218096e-11 + 0.27080225920000000000e-13 * t) * t) * t) * t) * t) * t;
}
case 48: {
double t = 2*y100 - 97;
return 0.12941991566142438816e0 + (0.24637684719508859484e-2 + (0.20450821127475879816e-4 + (0.15173366280523906622e-6 + (0.99907632506389027739e-9 + (0.57712760311351625221e-11 + 0.28703099555555555556e-13 * t) * t) * t) * t) * t) * t;
}
case 49: {
double t = 2*y100 - 99;
return 0.13443048593088696613e0 + (0.25474249981080823877e-2 + (0.21385669591362915223e-4 + (0.15996177579900443030e-6 + (0.10585428844575134013e-8 + (0.61258809536787882989e-11 + 0.30412080142222222222e-13 * t) * t) * t) * t) * t) * t;
}
case 50: {
double t = 2*y100 - 101;
return 0.13961217543434561353e0 + (0.26349215871051761416e-2 + (0.22371342712572567744e-4 + (0.16868008199296822247e-6 + (0.11216596910444996246e-8 + (0.65015264753090890662e-11 + 0.32210394506666666666e-13 * t) * t) * t) * t) * t) * t;
}
case 51: {
double t = 2*y100 - 103;
return 0.14497287157673800690e0 + (0.27264675383982439814e-2 + (0.23410870961050950197e-4 + (0.17791863939526376477e-6 + (0.11886425714330958106e-8 + (0.68993039665054288034e-11 + 0.34101266222222222221e-13 * t) * t) * t) * t) * t) * t;
}
case 52: {
double t = 2*y100 - 105;
return 0.15052089272774618151e0 + (0.28222846410136238008e-2 + (0.24507470422713397006e-4 + (0.18770927679626136909e-6 + (0.12597184587583370712e-8 + (0.73203433049229821618e-11 + 0.36087889048888888890e-13 * t) * t) * t) * t) * t) * t;
}
case 53: {
double t = 2*y100 - 107;
return 0.15626501395774612325e0 + (0.29226079376196624949e-2 + (0.25664553693768450545e-4 + (0.19808568415654461964e-6 + (0.13351257759815557897e-8 + (0.77658124891046760667e-11 + 0.38173420035555555555e-13 * t) * t) * t) * t) * t) * t;
}
case 54: {
double t = 2*y100 - 109;
return 0.16221449434620737567e0 + (0.30276865332726475672e-2 + (0.26885741326534564336e-4 + (0.20908350604346384143e-6 + (0.14151148144240728728e-8 + (0.82369170665974313027e-11 + 0.40360957457777777779e-13 * t) * t) * t) * t) * t) * t;
}
case 55: {
double t = 2*y100 - 111;
return 0.16837910595412130659e0 + (0.31377844510793082301e-2 + (0.28174873844911175026e-4 + (0.22074043807045782387e-6 + (0.14999481055996090039e-8 + (0.87348993661930809254e-11 + 0.42653528977777777779e-13 * t) * t) * t) * t) * t) * t;
}
case 56: {
double t = 2*y100 - 113;
return 0.17476916455659369953e0 + (0.32531815370903068316e-2 + (0.29536024347344364074e-4 + (0.23309632627767074202e-6 + (0.15899007843582444846e-8 + (0.92610375235427359475e-11 + 0.45054073102222222221e-13 * t) * t) * t) * t) * t) * t;
}
case 57: {
double t = 2*y100 - 115;
return 0.18139556223643701364e0 + (0.33741744168096996041e-2 + (0.30973511714709500836e-4 + (0.24619326937592290996e-6 + (0.16852609412267750744e-8 + (0.98166442942854895573e-11 + 0.47565418097777777779e-13 * t) * t) * t) * t) * t) * t;
}
case 58: {
double t = 2*y100 - 117;
return 0.18826980194443664549e0 + (0.35010775057740317997e-2 + (0.32491914440014267480e-4 + (0.26007572375886319028e-6 + (0.17863299617388376116e-8 + (0.10403065638343878679e-10 + 0.50190265831111111110e-13 * t) * t) * t) * t) * t) * t;
}
case 59: {
double t = 2*y100 - 119;
return 0.19540403413693967350e0 + (0.36342240767211326315e-2 + (0.34096085096200907289e-4 + (0.27479061117017637474e-6 + (0.18934228504790032826e-8 + (0.11021679075323598664e-10 + 0.52931171733333333334e-13 * t) * t) * t) * t) * t) * t;
}
case 60: {
double t = 2*y100 - 121;
return 0.20281109560651886959e0 + (0.37739673859323597060e-2 + (0.35791165457592409054e-4 + (0.29038742889416172404e-6 + (0.20068685374849001770e-8 + (0.11673891799578381999e-10 + 0.55790523093333333334e-13 * t) * t) * t) * t) * t) * t;
}
case 61: {
double t = 2*y100 - 123;
return 0.21050455062669334978e0 + (0.39206818613925652425e-2 + (0.37582602289680101704e-4 + (0.30691836231886877385e-6 + (0.21270101645763677824e-8 + (0.12361138551062899455e-10 + 0.58770520160000000000e-13 * t) * t) * t) * t) * t) * t;
}
case 62: {
double t = 2*y100 - 125;
return 0.21849873453703332479e0 + (0.40747643554689586041e-2 + (0.39476163820986711501e-4 + (0.32443839970139918836e-6 + (0.22542053491518680200e-8 + (0.13084879235290858490e-10 + 0.61873153262222222221e-13 * t) * t) * t) * t) * t) * t;
}
case 63: {
double t = 2*y100 - 127;
return 0.22680879990043229327e0 + (0.42366354648628516935e-2 + (0.41477956909656896779e-4 + (0.34300544894502810002e-6 + (0.23888264229264067658e-8 + (0.13846596292818514601e-10 + 0.65100183751111111110e-13 * t) * t) * t) * t) * t) * t;
}
case 64: {
double t = 2*y100 - 129;
return 0.23545076536988703937e0 + (0.44067409206365170888e-2 + (0.43594444916224700881e-4 + (0.36268045617760415178e-6 + (0.25312606430853202748e-8 + (0.14647791812837903061e-10 + 0.68453122631111111110e-13 * t) * t) * t) * t) * t) * t;
}
case 65: {
double t = 2*y100 - 131;
return 0.24444156740777432838e0 + (0.45855530511605787178e-2 + (0.45832466292683085475e-4 + (0.38352752590033030472e-6 + (0.26819103733055603460e-8 + (0.15489984390884756993e-10 + 0.71933206364444444445e-13 * t) * t) * t) * t) * t) * t;
}
case 66: {
double t = 2*y100 - 133;
return 0.25379911500634264643e0 + (0.47735723208650032167e-2 + (0.48199253896534185372e-4 + (0.40561404245564732314e-6 + (0.28411932320871165585e-8 + (0.16374705736458320149e-10 + 0.75541379822222222221e-13 * t) * t) * t) * t) * t) * t;
}
case 67: {
double t = 2*y100 - 135;
return 0.26354234756393613032e0 + (0.49713289477083781266e-2 + (0.50702455036930367504e-4 + (0.42901079254268185722e-6 + (0.30095422058900481753e-8 + (0.17303497025347342498e-10 + 0.79278273368888888890e-13 * t) * t) * t) * t) * t) * t;
}
case 68: {
double t = 2*y100 - 137;
return 0.27369129607732343398e0 + (0.51793846023052643767e-2 + (0.53350152258326602629e-4 + (0.45379208848865015485e-6 + (0.31874057245814381257e-8 + (0.18277905010245111046e-10 + 0.83144182364444444445e-13 * t) * t) * t) * t) * t) * t;
}
case 69: {
double t = 2*y100 - 139;
return 0.28426714781640316172e0 + (0.53983341916695141966e-2 + (0.56150884865255810638e-4 + (0.48003589196494734238e-6 + (0.33752476967570796349e-8 + (0.19299477888083469086e-10 + 0.87139049137777777779e-13 * t) * t) * t) * t) * t) * t;
}
case 70: {
double t = 2*y100 - 141;
return 0.29529231465348519920e0 + (0.56288077305420795663e-2 + (0.59113671189913307427e-4 + (0.50782393781744840482e-6 + (0.35735475025851713168e-8 + (0.20369760937017070382e-10 + 0.91262442613333333334e-13 * t) * t) * t) * t) * t) * t;
}
case 71: {
double t = 2*y100 - 143;
return 0.30679050522528838613e0 + (0.58714723032745403331e-2 + (0.62248031602197686791e-4 + (0.53724185766200945789e-6 + (0.37827999418960232678e-8 + (0.21490291930444538307e-10 + 0.95513539182222222221e-13 * t) * t) * t) * t) * t) * t;
}
case 72: {
double t = 2*y100 - 145;
return 0.31878680111173319425e0 + (0.61270341192339103514e-2 + (0.65564012259707640976e-4 + (0.56837930287837738996e-6 + (0.40035151353392378882e-8 + (0.22662596341239294792e-10 + 0.99891109760000000000e-13 * t) * t) * t) * t) * t) * t;
}
case 73: {
double t = 2*y100 - 147;
return 0.33130773722152622027e0 + (0.63962406646798080903e-2 + (0.69072209592942396666e-4 + (0.60133006661885941812e-6 + (0.42362183765883466691e-8 + (0.23888182347073698382e-10 + 0.10439349811555555556e-12 * t) * t) * t) * t) * t) * t;
}
case 74: {
double t = 2*y100 - 149;
return 0.34438138658041336523e0 + (0.66798829540414007258e-2 + (0.72783795518603561144e-4 + (0.63619220443228800680e-6 + (0.44814499336514453364e-8 + (0.25168535651285475274e-10 + 0.10901861383111111111e-12 * t) * t) * t) * t) * t) * t;
}
case 75: {
double t = 2*y100 - 151;
return 0.35803744972380175583e0 + (0.69787978834882685031e-2 + (0.76710543371454822497e-4 + (0.67306815308917386747e-6 + (0.47397647975845228205e-8 + (0.26505114141143050509e-10 + 0.11376390933333333333e-12 * t) * t) * t) * t) * t) * t;
}
case 76: {
double t = 2*y100 - 153;
return 0.37230734890119724188e0 + (0.72938706896461381003e-2 + (0.80864854542670714092e-4 + (0.71206484718062688779e-6 + (0.50117323769745883805e-8 + (0.27899342394100074165e-10 + 0.11862637614222222222e-12 * t) * t) * t) * t) * t) * t;
}
case 77: {
double t = 2*y100 - 155;
return 0.38722432730555448223e0 + (0.76260375162549802745e-2 + (0.85259785810004603848e-4 + (0.75329383305171327677e-6 + (0.52979361368388119355e-8 + (0.29352606054164086709e-10 + 0.12360253370666666667e-12 * t) * t) * t) * t) * t) * t;
}
case 78: {
double t = 2*y100 - 157;
return 0.40282355354616940667e0 + (0.79762880915029728079e-2 + (0.89909077342438246452e-4 + (0.79687137961956194579e-6 + (0.55989731807360403195e-8 + (0.30866246101464869050e-10 + 0.12868841946666666667e-12 * t) * t) * t) * t) * t) * t;
}
case 79: {
double t = 2*y100 - 159;
return 0.41914223158913787649e0 + (0.83456685186950463538e-2 + (0.94827181359250161335e-4 + (0.84291858561783141014e-6 + (0.59154537751083485684e-8 + (0.32441553034347469291e-10 + 0.13387957943111111111e-12 * t) * t) * t) * t) * t) * t;
}
case 80: {
double t = 2*y100 - 161;
return 0.43621971639463786896e0 + (0.87352841828289495773e-2 + (0.10002929142066799966e-3 + (0.89156148280219880024e-6 + (0.62480008150788597147e-8 + (0.34079760983458878910e-10 + 0.13917107176888888889e-12 * t) * t) * t) * t) * t) * t;
}
case 81: {
double t = 2*y100 - 163;
return 0.45409763548534330981e0 + (0.91463027755548240654e-2 + (0.10553137232446167258e-3 + (0.94293113464638623798e-6 + (0.65972492312219959885e-8 + (0.35782041795476563662e-10 + 0.14455745872000000000e-12 * t) * t) * t) * t) * t) * t;
}
case 82: {
double t = 2*y100 - 165;
return 0.47282001668512331468e0 + (0.95799574408860463394e-2 + (0.11135019058000067469e-3 + (0.99716373005509038080e-6 + (0.69638453369956970347e-8 + (0.37549499088161345850e-10 + 0.15003280712888888889e-12 * t) * t) * t) * t) * t) * t;
}
case 83: {
double t = 2*y100 - 167;
return 0.49243342227179841649e0 + (0.10037550043909497071e-1 + (0.11750334542845234952e-3 + (0.10544006716188967172e-5 + (0.73484461168242224872e-8 + (0.39383162326435752965e-10 + 0.15559069118222222222e-12 * t) * t) * t) * t) * t) * t;
}
case 84: {
double t = 2*y100 - 169;
return 0.51298708979209258326e0 + (0.10520454564612427224e-1 + (0.12400930037494996655e-3 + (0.11147886579371265246e-5 + (0.77517184550568711454e-8 + (0.41283980931872622611e-10 + 0.16122419680000000000e-12 * t) * t) * t) * t) * t) * t;
}
case 85: {
double t = 2*y100 - 171;
return 0.53453307979101369843e0 + (0.11030120618800726938e-1 + (0.13088741519572269581e-3 + (0.11784797595374515432e-5 + (0.81743383063044825400e-8 + (0.43252818449517081051e-10 + 0.16692592640000000000e-12 * t) * t) * t) * t) * t) * t;
}
case 86: {
double t = 2*y100 - 173;
return 0.55712643071169299478e0 + (0.11568077107929735233e-1 + (0.13815797838036651289e-3 + (0.12456314879260904558e-5 + (0.86169898078969313597e-8 + (0.45290446811539652525e-10 + 0.17268801084444444444e-12 * t) * t) * t) * t) * t) * t;
}
case 87: {
double t = 2*y100 - 175;
return 0.58082532122519320968e0 + (0.12135935999503877077e-1 + (0.14584223996665838559e-3 + (0.13164068573095710742e-5 + (0.90803643355106020163e-8 + (0.47397540713124619155e-10 + 0.17850211608888888889e-12 * t) * t) * t) * t) * t) * t;
}
case 88: {
double t = 2*y100 - 177;
return 0.60569124025293375554e0 + (0.12735396239525550361e-1 + (0.15396244472258863344e-3 + (0.13909744385382818253e-5 + (0.95651595032306228245e-8 + (0.49574672127669041550e-10 + 0.18435945564444444444e-12 * t) * t) * t) * t) * t) * t;
}
case 89: {
double t = 2*y100 - 179;
return 0.63178916494715716894e0 + (0.13368247798287030927e-1 + (0.16254186562762076141e-3 + (0.14695084048334056083e-5 + (0.10072078109604152350e-7 + (0.51822304995680707483e-10 + 0.19025081422222222222e-12 * t) * t) * t) * t) * t) * t;
}
case 90: {
double t = 2*y100 - 181;
return 0.65918774689725319200e0 + (0.14036375850601992063e-1 + (0.17160483760259706354e-3 + (0.15521885688723188371e-5 + (0.10601827031535280590e-7 + (0.54140790105837520499e-10 + 0.19616655146666666667e-12 * t) * t) * t) * t) * t) * t;
}
case 91: {
double t = 2*y100 - 183;
return 0.68795950683174433822e0 + (0.14741765091365869084e-1 + (0.18117679143520433835e-3 + (0.16392004108230585213e-5 + (0.11155116068018043001e-7 + (0.56530360194925690374e-10 + 0.20209663662222222222e-12 * t) * t) * t) * t) * t) * t;
}
case 92: {
double t = 2*y100 - 185;
return 0.71818103808729967036e0 + (0.15486504187117112279e-1 + (0.19128428784550923217e-3 + (0.17307350969359975848e-5 + (0.11732656736113607751e-7 + (0.58991125287563833603e-10 + 0.20803065333333333333e-12 * t) * t) * t) * t) * t) * t;
}
case 93: {
double t = 2*y100 - 187;
return 0.74993321911726254661e0 + (0.16272790364044783382e-1 + (0.20195505163377912645e-3 + (0.18269894883203346953e-5 + (0.12335161021630225535e-7 + (0.61523068312169087227e-10 + 0.21395783431111111111e-12 * t) * t) * t) * t) * t) * t;
}
case 94: {
double t = 2*y100 - 189;
return 0.78330143531283492729e0 + (0.17102934132652429240e-1 + (0.21321800585063327041e-3 + (0.19281661395543913713e-5 + (0.12963340087354341574e-7 + (0.64126040998066348872e-10 + 0.21986708942222222222e-12 * t) * t) * t) * t) * t) * t;
}
case 95: {
double t = 2*y100 - 191;
return 0.81837581041023811832e0 + (0.17979364149044223802e-1 + (0.22510330592753129006e-3 + (0.20344732868018175389e-5 + (0.13617902941839949718e-7 + (0.66799760083972474642e-10 + 0.22574701262222222222e-12 * t) * t) * t) * t) * t) * t;
}
case 96: {
double t = 2*y100 - 193;
return 0.85525144775685126237e0 + (0.18904632212547561026e-1 + (0.23764237370371255638e-3 + (0.21461248251306387979e-5 + (0.14299555071870523786e-7 + (0.69543803864694171934e-10 + 0.23158593688888888889e-12 * t) * t) * t) * t) * t) * t;
}
case 97: {
double t = 2*y100 - 195;
return 0.89402868170849933734e0 + (0.19881418399127202569e-1 + (0.25086793128395995798e-3 + (0.22633402747585233180e-5 + (0.15008997042116532283e-7 + (0.72357609075043941261e-10 + 0.23737194737777777778e-12 * t) * t) * t) * t) * t) * t;
}
case 98: {
double t = 2*y100 - 197;
return 0.93481333942870796363e0 + (0.20912536329780368893e-1 + (0.26481403465998477969e-3 + (0.23863447359754921676e-5 + (0.15746923065472184451e-7 + (0.75240468141720143653e-10 + 0.24309291271111111111e-12 * t) * t) * t) * t) * t) * t;
}
case 99: {
double t = 2*y100 - 199;
return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682383001e-3 + (0.25153688325245314530e-5 + (0.16514019547822821453e-7 + (0.78191526829368231251e-10 + 0.24873652355555555556e-12 * t) * t) * t) * t) * t) * t;
}
}
/* we only get here if y = 1, i.e. |x| < 4*eps, in which case
* erfcx is within 1e-15 of 1.. */
return 1.0;
} /* erfcx_y100 */
/* optimizer friendly implementation of exp2(x).
*
* strategy:
*
* split argument into an integer part and a fraction:
* ipart = floor(x+0.5);
* fpart = x - ipart;
*
* compute exp2(ipart) from setting the ieee754 exponent
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
*
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
*/
/* IEEE 754 double precision floating point data manipulation */
typedef union
{
double f;
uint64_t u;
struct {int32_t i0,i1;} s;
} udi_t;
static const double fm_exp2_q[] = {
/* 1.00000000000000000000e0, */
2.33184211722314911771e2,
4.36821166879210612817e3
};
static const double fm_exp2_p[] = {
2.30933477057345225087e-2,
2.02020656693165307700e1,
1.51390680115615096133e3
};
double MathSpecialKokkos::exp2_x86(double x)
{
double ipart, fpart, px, qx;
udi_t epart;
ipart = floor(x+0.5);
fpart = x - ipart;
epart.s.i0 = 0;
epart.s.i1 = (((int) ipart) + 1023) << 20;
x = fpart*fpart;
px = fm_exp2_p[0];
px = px*x + fm_exp2_p[1];
qx = x + fm_exp2_q[0];
px = px*x + fm_exp2_p[2];
qx = qx*x + fm_exp2_q[1];
px = px * fpart;
x = 1.0 + 2.0*(px/(qx-px));
return epart.f*x;
}

View File

@ -0,0 +1,100 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifndef LMP_MATH_SPECIAL_KOKKOS_H
#define LMP_MATH_SPECIAL_KOKKOS_H
#include <math.h>
#include "kokkos_type.h"
namespace LAMMPS_NS {
namespace MathSpecialKokkos {
// support function for scaled error function complement
extern double erfcx_y100(const double y100);
// fast 2**x function without argument checks for little endian CPUs
extern double exp2_x86(double x);
// scaled error function complement exp(x*x)*erfc(x) for coul/long styles
static inline double my_erfcx(const double x)
{
if (x >= 0.0) return erfcx_y100(400.0/(4.0+x));
else return 2.0*exp(x*x) - erfcx_y100(400.0/(4.0-x));
}
// exp(-x*x) for coul/long styles
static inline double expmsq(double x)
{
x *= x;
x *= 1.4426950408889634074; // log_2(e)
#if defined(__BYTE_ORDER__)
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
return (x < 1023.0) ? exp2_x86(-x) : 0.0;
#endif
#endif
return (x < 1023.0) ? exp2(-x) : 0.0;
}
// x**2, use instead of pow(x,2.0)
KOKKOS_INLINE_FUNCTION
static double square(const double &x) { return x*x; }
// x**3, use instead of pow(x,3.0)
KOKKOS_INLINE_FUNCTION
static double cube(const double &x) { return x*x*x; }
// return -1.0 for odd n, 1.0 for even n, like pow(-1.0,n)
KOKKOS_INLINE_FUNCTION
static double powsign(const int n) { return (n & 1) ? -1.0 : 1.0; }
// optimized version of pow(x,n) with n being integer
// up to 10x faster than pow(x,y)
KOKKOS_INLINE_FUNCTION
static double powint(const double &x, const int n) {
double yy,ww;
if (x == 0.0) return 0.0;
int nn = (n > 0) ? n : -n;
ww = x;
for (yy = 1.0; nn != 0; nn >>= 1, ww *=ww)
if (nn & 1) yy *= ww;
return (n > 0) ? yy : 1.0/yy;
}
// optimized version of (sin(x)/x)**n with n being a _positive_ integer
KOKKOS_INLINE_FUNCTION
static double powsinxx(const double &x, int n) {
double yy,ww;
if (x == 0.0) return 1.0;
ww = sin(x)/x;
for (yy = 1.0; n != 0; n >>= 1, ww *=ww)
if (n & 1) yy *= ww;
return yy;
}
}
}
#endif

View File

@ -115,6 +115,19 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
// reallocate per-atom arrays if necessary
if (eflag_atom) {
memory->destroy_kokkos(k_eatom,eatom);
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
d_eatom = k_eatom.view<DeviceType>();
}
if (vflag_atom) {
memory->destroy_kokkos(k_vatom,vatom);
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
d_vatom = k_vatom.view<DeviceType>();
}
atomKK->sync(execution_space,datamask_read);
k_cutsq.template sync<DeviceType>();
k_cut_ljsq.template sync<DeviceType>();
@ -169,6 +182,16 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
if (vflag_fdotr) pair_virial_fdotr_compute(this);
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
copymode = 0;
}

View File

@ -68,6 +68,10 @@ PairLJCutCoulLongKokkos<DeviceType>::PairLJCutCoulLongKokkos(LAMMPS *lmp):PairLJ
template<class DeviceType>
PairLJCutCoulLongKokkos<DeviceType>::~PairLJCutCoulLongKokkos()
{
memory->destroy_kokkos(k_eatom,eatom);
memory->destroy_kokkos(k_vatom,vatom);
eatom = NULL;
vatom = NULL;
if (allocated){
memory->destroy_kokkos(k_cutsq, cutsq);
memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
@ -100,6 +104,20 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = 0;
// reallocate per-atom arrays if necessary
if (eflag_atom) {
memory->destroy_kokkos(k_eatom,eatom);
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
d_eatom = k_eatom.view<DeviceType>();
}
if (vflag_atom) {
memory->destroy_kokkos(k_vatom,vatom);
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
d_vatom = k_vatom.view<DeviceType>();
}
atomKK->sync(execution_space,datamask_read);
k_cutsq.template sync<DeviceType>();
k_cut_ljsq.template sync<DeviceType>();
@ -151,6 +169,16 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
}
if (vflag_fdotr) pair_virial_fdotr_compute(this);
if (eflag_atom) {
k_eatom.template modify<DeviceType>();
k_eatom.template sync<LMPHostType>();
}
if (vflag_atom) {
k_vatom.template modify<DeviceType>();
k_vatom.template sync<LMPHostType>();
}
}
/* ----------------------------------------------------------------------

View File

@ -87,6 +87,9 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
typename ArrayTypes<DeviceType>::t_f_array f;
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;

View File

@ -660,6 +660,8 @@ void PairReaxCKokkos<DeviceType>::LR_vdW_Coulomb( int i, int j, double r_ij, LR_
template<class DeviceType>
void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
{
copymode = 1;
bocnt = hbcnt = 0;
eflag = eflag_in;
@ -703,8 +705,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
pvector[i] = 0.0;
}
copymode = 1;
EV_FLOAT_REAX ev;
EV_FLOAT_REAX ev_all;
@ -1363,6 +1363,23 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxZero, const int &n) const {
d_dDeltap_self(n,j) = 0.0;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxZeroEAtom, const int &i) const {
v_eatom(i) = 0.0;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxCKokkos<DeviceType>::operator()(PairReaxZeroVAtom, const int &i) const {
v_vatom(i,0) = 0.0;
v_vatom(i,1) = 0.0;
v_vatom(i,2) = 0.0;
v_vatom(i,3) = 0.0;
v_vatom(i,4) = 0.0;
v_vatom(i,5) = 0.0;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
@ -2398,7 +2415,7 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeAngular<NEIGHFLAG,EV
F_FLOAT BOA_ij, BOA_ik, rij, bo_ij, bo_ik;
F_FLOAT dcos_theta_di[3], dcos_theta_dj[3], dcos_theta_dk[3];
F_FLOAT eng_tmp, fi_tmp[3], fj_tmp[3], fk_tmp[3];
F_FLOAT delij[3], delik[3];
F_FLOAT delij[3], delik[3], delji[3], delki[3];
p_val6 = gp[14];
p_val8 = gp[33];
@ -2648,8 +2665,10 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeAngular<NEIGHFLAG,EV
if (EVFLAG) {
eng_tmp = e_ang + e_pen + e_coa;
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng_tmp,0.0,0.0,0.0,0.0);
for (int d = 0; d < 3; d++) delki[d] = -1.0 * delik[d];
for (int d = 0; d < 3; d++) delji[d] = -1.0 * delij[d];
if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,eng_tmp);
if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj_tmp,fk_tmp,delij,delik);
if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj_tmp,fk_tmp,delji,delki);
}
}
@ -2884,31 +2903,34 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTorsion<NEIGHFLAG,EV
if( arg > 1.0 ) arg = 1.0;
if( arg < -1.0 ) arg = -1.0;
if( sin_ijk >= 0 && sin_ijk <= 1e-10 ) sin_ijk = 1e-10;
else if( sin_ijk <= 0 && sin_ijk >= -1e-10 ) sin_ijk = -1e-10;
if( sin_jil >= 0 && sin_jil <= 1e-10 ) sin_jil = 1e-10;
else if( sin_jil <= 0 && sin_jil >= -1e-10 ) sin_jil = -1e-10;
F_FLOAT sin_ijk_rnd = sin_ijk;
F_FLOAT sin_jil_rnd = sin_jil;
if( sin_ijk >= 0 && sin_ijk <= 1e-10 ) sin_ijk_rnd = 1e-10;
else if( sin_ijk <= 0 && sin_ijk >= -1e-10 ) sin_ijk_rnd = -1e-10;
if( sin_jil >= 0 && sin_jil <= 1e-10 ) sin_jil_rnd = 1e-10;
else if( sin_jil <= 0 && sin_jil >= -1e-10 ) sin_jil_rnd = -1e-10;
// dcos_omega_di
for (int d = 0; d < 3; d++) dcos_omega_dk[d] = ((htra-arg*hnra)/rik) * delik[d] - dellk[d];
for (int d = 0; d < 3; d++) dcos_omega_dk[d] += (hthd-arg*hnhd)/sin_ijk * -dcos_ijk_dk[d];
for (int d = 0; d < 3; d++) dcos_omega_dk[d] += (hthd-arg*hnhd)/sin_ijk_rnd * -dcos_ijk_dk[d];
for (int d = 0; d < 3; d++) dcos_omega_dk[d] *= 2.0/poem;
// dcos_omega_dj
for (int d = 0; d < 3; d++) dcos_omega_di[d] = -((htra-arg*hnra)/rik) * delik[d] - htrb/rij * delij[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthd-arg*hnhd)/sin_ijk * dcos_ijk_di[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthe-arg*hnhe)/sin_jil * dcos_jil_di[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthd-arg*hnhd)/sin_ijk_rnd * dcos_ijk_di[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthe-arg*hnhe)/sin_jil_rnd * dcos_jil_di[d];
for (int d = 0; d < 3; d++) dcos_omega_di[d] *= 2.0/poem;
// dcos_omega_dk
for (int d = 0; d < 3; d++) dcos_omega_dj[d] = -((htrc-arg*hnrc)/rjl) * deljl[d] + htrb/rij * delij[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthd-arg*hnhd)/sin_ijk * dcos_ijk_dj[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthe-arg*hnhe)/sin_jil * dcos_jil_dj[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthd-arg*hnhd)/sin_ijk_rnd * dcos_ijk_dj[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthe-arg*hnhe)/sin_jil_rnd * dcos_jil_dj[d];
for (int d = 0; d < 3; d++) dcos_omega_dj[d] *= 2.0/poem;
// dcos_omega_dl
for (int d = 0; d < 3; d++) dcos_omega_dl[d] = ((htrc-arg*hnrc)/rjl) * deljl[d] + dellk[d];
for (int d = 0; d < 3; d++) dcos_omega_dl[d] += (hthe-arg*hnhe)/sin_jil * -dcos_jil_dk[d];
for (int d = 0; d < 3; d++) dcos_omega_dl[d] += (hthe-arg*hnhe)/sin_jil_rnd * -dcos_jil_dk[d];
for (int d = 0; d < 3; d++) dcos_omega_dl[d] *= 2.0/poem;
cos_omega = cos( omega );
@ -3728,12 +3750,12 @@ void PairReaxCKokkos<DeviceType>::v_tally3(EV_FLOAT_REAX &ev, const int &i, cons
F_FLOAT v[6];
v[0] = (drij[0]*fj[0] + drik[0]*fk[0]);
v[1] = (drij[1]*fj[1] + drik[1]*fk[1]);
v[2] = (drij[2]*fj[2] + drik[2]*fk[2]);
v[3] = (drij[0]*fj[1] + drik[0]*fk[1]);
v[4] = (drij[0]*fj[2] + drik[0]*fk[2]);
v[5] = (drij[1]*fj[2] + drik[1]*fk[2]);
v[0] = drij[0]*fj[0] + drik[0]*fk[0];
v[1] = drij[1]*fj[1] + drik[1]*fk[1];
v[2] = drij[2]*fj[2] + drik[2]*fk[2];
v[3] = drij[0]*fj[1] + drik[0]*fk[1];
v[4] = drij[0]*fj[2] + drik[0]*fk[2];
v[5] = drij[1]*fj[2] + drik[1]*fk[2];
if (vflag_global) {
ev.v[0] += v[0];
@ -3745,12 +3767,12 @@ void PairReaxCKokkos<DeviceType>::v_tally3(EV_FLOAT_REAX &ev, const int &i, cons
}
if (vflag_atom) {
a_vatom(i,0) += THIRD*v[0]; a_vatom(i,1) += THIRD*v[1]; a_vatom(i,2) += THIRD*v[2];
a_vatom(i,3) += THIRD*v[3]; a_vatom(i,4) += THIRD*v[4]; a_vatom(i,5) += THIRD*v[5];
a_vatom(j,0) += THIRD*v[0]; a_vatom(j,1) += THIRD*v[1]; a_vatom(j,2) += THIRD*v[2];
a_vatom(j,3) += THIRD*v[3]; a_vatom(j,4) += THIRD*v[4]; a_vatom(j,5) += THIRD*v[5];
a_vatom(k,0) += THIRD*v[0]; a_vatom(k,1) += THIRD*v[1]; a_vatom(k,2) += THIRD*v[2];
a_vatom(k,3) += THIRD*v[3]; a_vatom(k,4) += THIRD*v[4]; a_vatom(k,5) += THIRD*v[5];
a_vatom(i,0) += THIRD * v[0]; a_vatom(i,1) += THIRD * v[1]; a_vatom(i,2) += THIRD * v[2];
a_vatom(i,3) += THIRD * v[3]; a_vatom(i,4) += THIRD * v[4]; a_vatom(i,5) += THIRD * v[5];
a_vatom(j,0) += THIRD * v[0]; a_vatom(j,1) += THIRD * v[1]; a_vatom(j,2) += THIRD * v[2];
a_vatom(j,3) += THIRD * v[3]; a_vatom(j,4) += THIRD * v[4]; a_vatom(j,5) += THIRD * v[5];
a_vatom(k,0) += THIRD * v[0]; a_vatom(k,1) += THIRD * v[1]; a_vatom(k,2) += THIRD * v[2];
a_vatom(k,3) += THIRD * v[3]; a_vatom(k,4) += THIRD * v[4]; a_vatom(k,5) += THIRD * v[5];
}
}
@ -3767,12 +3789,12 @@ void PairReaxCKokkos<DeviceType>::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons
// The vatom array is atomic for Half/Thread neighbor style
F_FLOAT v[6];
v[0] = 0.25 * (dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0]);
v[1] = 0.25 * (dril[1]*fi[1] + drjl[1]*fj[1] + drkl[1]*fk[1]);
v[2] = 0.25 * (dril[2]*fi[2] + drjl[2]*fj[2] + drkl[2]*fk[2]);
v[3] = 0.25 * (dril[0]*fi[1] + drjl[0]*fj[1] + drkl[0]*fk[1]);
v[4] = 0.25 * (dril[0]*fi[2] + drjl[0]*fj[2] + drkl[0]*fk[2]);
v[5] = 0.25 * (dril[1]*fi[2] + drjl[1]*fj[2] + drkl[1]*fk[2]);
v[0] = dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0];
v[1] = dril[1]*fi[1] + drjl[1]*fj[1] + drkl[1]*fk[1];
v[2] = dril[2]*fi[2] + drjl[2]*fj[2] + drkl[2]*fk[2];
v[3] = dril[0]*fi[1] + drjl[0]*fj[1] + drkl[0]*fk[1];
v[4] = dril[0]*fi[2] + drjl[0]*fj[2] + drkl[0]*fk[2];
v[5] = dril[1]*fi[2] + drjl[1]*fj[2] + drkl[1]*fk[2];
if (vflag_global) {
ev.v[0] += v[0];
@ -3785,14 +3807,14 @@ void PairReaxCKokkos<DeviceType>::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons
if (vflag_atom) {
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
a_vatom(l,0) += v[0]; a_vatom(l,1) += v[1]; a_vatom(l,2) += v[2];
a_vatom(l,3) += v[3]; a_vatom(l,4) += v[4]; a_vatom(l,5) += v[5];
a_vatom(i,0) += 0.25 * v[0]; a_vatom(i,1) += 0.25 * v[1]; a_vatom(i,2) += 0.25 * v[2];
a_vatom(i,3) += 0.25 * v[3]; a_vatom(i,4) += 0.25 * v[4]; a_vatom(i,5) += 0.25 * v[5];
a_vatom(j,0) += 0.25 * v[0]; a_vatom(j,1) += 0.25 * v[1]; a_vatom(j,2) += 0.25 * v[2];
a_vatom(j,3) += 0.25 * v[3]; a_vatom(j,4) += 0.25 * v[4]; a_vatom(j,5) += 0.25 * v[5];
a_vatom(k,0) += 0.25 * v[0]; a_vatom(k,1) += 0.25 * v[1]; a_vatom(k,2) += 0.25 * v[2];
a_vatom(k,3) += 0.25 * v[3]; a_vatom(k,4) += 0.25 * v[4]; a_vatom(k,5) += 0.25 * v[5];
a_vatom(l,0) += 0.25 * v[0]; a_vatom(l,1) += 0.25 * v[1]; a_vatom(l,2) += 0.25 * v[2];
a_vatom(l,3) += 0.25 * v[3]; a_vatom(l,4) += 0.25 * v[4]; a_vatom(l,5) += 0.25 * v[5];
}
}
@ -3894,6 +3916,14 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag)
if (eflag_global) eng_vdwl = eng_coul = 0.0;
if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
if (eflag_atom) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroEAtom>(0,maxeatom),*this);
DeviceType::fence();
}
if (vflag_atom) {
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroVAtom>(0,maxvatom),*this);
DeviceType::fence();
}
// if vflag_global = 2 and pair::compute() calls virial_fdotr_compute()
// compute global virial via (F dot r) instead of via pairwise summation

View File

@ -84,6 +84,10 @@ struct PairReaxBuildListsHalf_LessAtomics{};
struct PairReaxZero{};
struct PairReaxZeroEAtom{};
struct PairReaxZeroVAtom{};
struct PairReaxBondOrder1{};
struct PairReaxBondOrder1_LessAtomics{};
@ -172,6 +176,12 @@ class PairReaxCKokkos : public PairReaxC {
KOKKOS_INLINE_FUNCTION
void operator()(PairReaxZero, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(PairReaxZeroEAtom, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(PairReaxZeroVAtom, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(PairReaxBondOrder1, const int&) const;

3193
src/KOKKOS/pppm_kokkos.cpp Normal file

File diff suppressed because it is too large Load Diff

552
src/KOKKOS/pppm_kokkos.h Normal file
View File

@ -0,0 +1,552 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef KSPACE_CLASS
KSpaceStyle(pppm/kk,PPPMKokkos<LMPDeviceType>)
KSpaceStyle(pppm/kk/device,PPPMKokkos<LMPDeviceType>)
KSpaceStyle(pppm/kk/host,PPPMKokkos<LMPHostType>)
#else
#ifndef LMP_PPPM_KOKKOS_H
#define LMP_PPPM_KOKKOS_H
#include "pppm.h"
#include "gridcomm_kokkos.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
struct TagPPPM_setup1{};
struct TagPPPM_setup2{};
struct TagPPPM_setup3{};
struct TagPPPM_setup4{};
struct TagPPPM_compute_gf_ik{};
struct TagPPPM_compute_gf_ik_triclinic{};
struct TagPPPM_self1{};
struct TagPPPM_self2{};
struct TagPPPM_brick2fft{};
struct TagPPPM_particle_map{};
struct TagPPPM_make_rho_zero{};
struct TagPPPM_make_rho_atomic{};
struct TagPPPM_make_rho{};
struct TagPPPM_poisson_ik1{};
struct TagPPPM_poisson_ik2{};
struct TagPPPM_poisson_ik3{};
struct TagPPPM_poisson_ik4{};
struct TagPPPM_poisson_ik5{};
struct TagPPPM_poisson_ik6{};
struct TagPPPM_poisson_ik7{};
struct TagPPPM_poisson_ik8{};
struct TagPPPM_poisson_ik9{};
struct TagPPPM_poisson_ik10{};
struct TagPPPM_poisson_ik_triclinic1{};
struct TagPPPM_poisson_ik_triclinic2{};
struct TagPPPM_poisson_ik_triclinic3{};
struct TagPPPM_poisson_ik_triclinic4{};
struct TagPPPM_poisson_ik_triclinic5{};
struct TagPPPM_poisson_ik_triclinic6{};
struct TagPPPM_poisson_peratom1{};
struct TagPPPM_poisson_peratom2{};
struct TagPPPM_poisson_peratom3{};
struct TagPPPM_poisson_peratom4{};
struct TagPPPM_poisson_peratom5{};
struct TagPPPM_poisson_peratom6{};
struct TagPPPM_poisson_peratom7{};
struct TagPPPM_poisson_peratom8{};
struct TagPPPM_poisson_peratom9{};
struct TagPPPM_poisson_peratom10{};
struct TagPPPM_poisson_peratom11{};
struct TagPPPM_poisson_peratom12{};
struct TagPPPM_poisson_peratom13{};
struct TagPPPM_poisson_peratom14{};
struct TagPPPM_fieldforce_ik{};
struct TagPPPM_fieldforce_peratom{};
struct TagPPPM_pack_forward1{};
struct TagPPPM_pack_forward2{};
struct TagPPPM_unpack_forward1{};
struct TagPPPM_unpack_forward2{};
struct TagPPPM_pack_reverse{};
struct TagPPPM_unpack_reverse{};
struct TagPPPM_slabcorr1{};
struct TagPPPM_slabcorr2{};
struct TagPPPM_slabcorr3{};
struct TagPPPM_slabcorr4{};
struct TagPPPM_timing_zero{};
template<class DeviceType>
class PPPMKokkos : public PPPM {
public:
typedef DeviceType device_type;
typedef ArrayTypes<DeviceType> AT;
PPPMKokkos(class LAMMPS *, int, char **);
virtual ~PPPMKokkos();
virtual void init();
virtual void setup();
void setup_grid();
virtual void compute(int, int);
virtual int timing_1d(int, double &);
virtual int timing_3d(int, double &);
virtual double memory_usage();
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_setup1, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_setup2, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_setup3, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_setup4, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_compute_gf_ik, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_compute_gf_ik_triclinic, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_self1, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_self2, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_brick2fft, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_particle_map, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_make_rho_zero, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_make_rho_atomic, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_make_rho, typename Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho>::member_type) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik1, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik2, const int&, EV_FLOAT &) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik3, const int&, EV_FLOAT &) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik4, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik5, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik6, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik7, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik8, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik9, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik10, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik_triclinic1, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik_triclinic2, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik_triclinic3, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik_triclinic4, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik_triclinic5, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_ik_triclinic6, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom1, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom2, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom3, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom4, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom5, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom6, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom7, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom8, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom9, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom10, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom11, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom12, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom13, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_poisson_peratom14, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_fieldforce_ik, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_fieldforce_peratom, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_pack_forward1, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_pack_forward2, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_unpack_forward1, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_unpack_forward2, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_pack_reverse, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_unpack_reverse, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_slabcorr1, const int&, double&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_slabcorr2, const int&, double&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_slabcorr3, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_slabcorr4, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPPPM_timing_zero, const int&) const;
//template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
//KOKKOS_INLINE_FUNCTION
//void operator()(TagPPPMKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
protected:
double unitkx,unitky,unitkz;
double scaleinv,s2;
double qscale,efact,ffact,dipole_all,dipole_r2,zprd;
double xprd,yprd,zprd_slab;
int nbx,nby,nbz,twoorder;
int numx_fft,numy_fft,numz_fft;
int numx_inout,numy_inout,numz_inout;
int numx_out,numy_out,numz_out;
int ix,iy,nlocal;
int nx,ny,nz;
typename AT::t_int_1d_um d_list_index;
typename AT::t_FFT_SCALAR_1d_um d_buf;
DAT::tdual_int_scalar k_flag;
typename AT::t_x_array_randomread x;
typename AT::t_f_array f;
typename AT::t_float_1d_randomread q;
DAT::tdual_efloat_1d k_eatom;
DAT::tdual_virial_array k_vatom;
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
int factors[3];
typename AT::t_FFT_SCALAR_3d d_density_brick;
typename AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick;
typename AT::t_FFT_SCALAR_3d d_u_brick;
typename AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick;
typename AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick;
typename AT::t_float_1d d_greensfn;
typename AT::t_virial_array d_vg;
typename AT::t_float_1d d_fkx;
typename AT::t_float_1d d_fky;
typename AT::t_float_1d d_fkz;
DAT::tdual_FFT_SCALAR_1d k_density_fft;
DAT::tdual_FFT_SCALAR_1d k_work1;
DAT::tdual_FFT_SCALAR_1d k_work2;
typename AT::t_FFT_SCALAR_1d d_density_fft;
typename AT::t_FFT_SCALAR_1d d_work1;
typename AT::t_FFT_SCALAR_1d d_work2;
DAT::tdual_float_1d k_gf_b;
typename AT::t_float_1d d_gf_b;
//FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff;
typename AT::t_FFT_SCALAR_2d_3 d_rho1d;
DAT::tdual_FFT_SCALAR_2d k_rho_coeff;
typename AT::t_FFT_SCALAR_2d d_rho_coeff;
HAT::t_FFT_SCALAR_2d h_rho_coeff;
//double **acons;
typename Kokkos::DualView<F_FLOAT[8][7],Kokkos::LayoutRight,DeviceType>::t_host acons;
class FFT3d *fft1,*fft2;
class Remap *remap;
GridCommKokkos<DeviceType> *cg;
GridCommKokkos<DeviceType> *cg_peratom;
//int **part2grid; // storage for particle -> grid mapping
typename AT::t_int_1d_3 d_part2grid;
//double *boxlo;
double boxlo[3];
void set_grid_global();
void set_grid_local();
void adjust_gewald();
double newton_raphson_f();
double derivf();
double final_accuracy();
virtual void allocate();
virtual void allocate_peratom();
virtual void deallocate();
virtual void deallocate_peratom();
int factorable(int);
double compute_df_kspace();
double estimate_ik_error(double, double, bigint);
virtual void compute_gf_denom();
virtual void compute_gf_ik();
virtual void particle_map();
virtual void make_rho();
virtual void brick2fft();
virtual void poisson();
virtual void poisson_ik();
virtual void fieldforce();
virtual void fieldforce_ik();
virtual void poisson_peratom();
virtual void fieldforce_peratom();
void procs2grid2d(int,int,int,int *, int*);
KOKKOS_INLINE_FUNCTION
void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &,
const FFT_SCALAR &) const;
void compute_rho_coeff();
void slabcorr();
// grid communication
virtual void pack_forward_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
virtual void unpack_forward_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
virtual void pack_reverse_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
virtual void unpack_reverse_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
// triclinic
int triclinic; // domain settings, orthog or triclinic
void setup_triclinic();
void compute_gf_ik_triclinic();
void poisson_ik_triclinic();
/* ----------------------------------------------------------------------
denominator for Hockney-Eastwood Green's function
of x,y,z = sin(kx*deltax/2), etc
inf n-1
S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l
j=-inf l=0
= -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x)
gf_b = denominator expansion coeffs
------------------------------------------------------------------------- */
KOKKOS_INLINE_FUNCTION
double gf_denom(const double &x, const double &y,
const double &z) const {
double sx,sy,sz;
sz = sy = sx = 0.0;
for (int l = order-1; l >= 0; l--) {
sx = d_gf_b[l] + sx*x;
sy = d_gf_b[l] + sy*y;
sz = d_gf_b[l] + sz*z;
}
double s = sx*sy*sz;
return s*s;
};
};
}
#endif
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Cannot (yet) use PPPM with triclinic box and kspace_modify diff ad
This feature is not yet supported.
E: Cannot (yet) use PPPM with triclinic box and slab correction
This feature is not yet supported.
E: Cannot use PPPM with 2d simulation
The kspace style pppm cannot be used in 2d simulations. You can use
2d PPPM in a 3d simulation; see the kspace_modify command.
E: PPPM can only currently be used with comm_style brick
This is a current restriction in LAMMPS.
E: Kspace style requires atom attribute q
The atom style defined does not have these attributes.
E: Cannot use nonperiodic boundaries with PPPM
For kspace style pppm, all 3 dimensions must have periodic boundaries
unless you use the kspace_modify command to define a 2d slab with a
non-periodic z dimension.
E: Incorrect boundaries with slab PPPM
Must have periodic x,y dimensions and non-periodic z dimension to use
2d slab option with PPPM.
E: PPPM order cannot be < 2 or > than %d
This is a limitation of the PPPM implementation in LAMMPS.
E: KSpace style is incompatible with Pair style
Setting a kspace style requires that a pair style with matching
long-range Coulombic or dispersion components be used.
E: Pair style is incompatible with TIP4P KSpace style
The pair style does not have the requires TIP4P settings.
E: Bond and angle potentials must be defined for TIP4P
Cannot use TIP4P pair potential unless bond and angle potentials
are defined.
E: Bad TIP4P angle type for PPPM/TIP4P
Specified angle type is not valid.
E: Bad TIP4P bond type for PPPM/TIP4P
Specified bond type is not valid.
E: Cannot (yet) use PPPM with triclinic box and TIP4P
This feature is not yet supported.
W: Reducing PPPM order b/c stencil extends beyond nearest neighbor processor
This may lead to a larger grid than desired. See the kspace_modify overlap
command to prevent changing of the PPPM order.
E: PPPM order < minimum allowed order
The default minimum order is 2. This can be reset by the
kspace_modify minorder command.
E: PPPM grid stencil extends beyond nearest neighbor processor
This is not allowed if the kspace_modify overlap setting is no.
E: KSpace accuracy must be > 0
The kspace accuracy designated in the input must be greater than zero.
E: Could not compute grid size
The code is unable to compute a grid size consistent with the desired
accuracy. This error should not occur for typical problems. Please
send an email to the developers.
E: PPPM grid is too large
The global PPPM grid is larger than OFFSET in one or more dimensions.
OFFSET is currently set to 4096. You likely need to decrease the
requested accuracy.
E: Could not compute g_ewald
The Newton-Raphson solver failed to converge to a good value for
g_ewald. This error should not occur for typical problems. Please
send an email to the developers.
E: Non-numeric box dimensions - simulation unstable
The box size has apparently blown up.
E: Out of range atoms - cannot compute PPPM
One or more atoms are attempting to map their charge to a PPPM grid
point that is not owned by a processor. This is likely for one of two
reasons, both of them bad. First, it may mean that an atom near the
boundary of a processor's sub-domain has moved more than 1/2 the
"neighbor skin distance"_neighbor.html without neighbor lists being
rebuilt and atoms being migrated to new processors. This also means
you may be missing pairwise interactions that need to be computed.
The solution is to change the re-neighboring criteria via the
"neigh_modify"_neigh_modify command. The safest settings are "delay 0
every 1 check yes". Second, it may mean that an atom has moved far
outside a processor's sub-domain or even the entire simulation box.
This indicates bad physics, e.g. due to highly overlapping atoms, too
large a timestep, etc.
*/

View File

@ -152,6 +152,8 @@ PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
PPPM::~PPPM()
{
if (copymode) return;
delete [] factors;
deallocate();
if (peratom_allocate_flag) deallocate_peratom();

View File

@ -1,5 +1,25 @@
Smooth Mach Dynamics -- Smooth Particle Hydrodynamics for solids and fluids
Currently, the package has the following features:
Does liquids via traditional Smooth Particle Hydrodynamics (SPH)
Also solves solids mechanics problems via a state of the art
stabilized meshless method with hourglass control.
Can specify hydrostatic interactions independently from material
strength models, i.e. pressure and deviatoric stresses are separated.
Many material models available (Johnson-Cook, plasticity with
hardening, Mie-Grueneisen, Polynomial EOS). Easy to add new material
models.
Rigid boundary conditions (walls) can be loaded as surface geometries
from *.STL files.
See the file doc/PDF/SMD_LAMMPS_userguide.pdf to get started.
There are example scripts for using this package in examples/USER/smd.
The person who created this package is Georg Ganzenmuller at the
Fraunhofer-Institute for High-Speed Dynamics, Ernst Mach Institute in
Germany (georg.ganzenmueller at emi.fhg.de). Contact him directly if

View File

@ -87,6 +87,7 @@ class ModifyKokkos : public Modify {
class DAT {
public:
typedef double tdual_xfloat_1d;
typedef double tdual_FFT_SCALAR_1d;
typedef int t_int_1d;
typedef int tdual_int_2d;
};

View File

@ -94,6 +94,7 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
execution_space = Host;
datamask_read = ALL_MASK;
datamask_modify = ALL_MASK;
copymode = 0;
memory->create(gcons,7,7,"kspace:gcons");
gcons[2][0] = 15.0 / 8.0;
@ -149,6 +150,8 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
KSpace::~KSpace()
{
if (copymode) return;
memory->destroy(eatom);
memory->destroy(vatom);
memory->destroy(gcons);

View File

@ -15,6 +15,7 @@
#define LMP_KSPACE_H
#include "pointers.h"
#include "accelerator_kokkos.h"
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
@ -85,6 +86,7 @@ class KSpace : protected Pointers {
// KOKKOS host/device flag and data masks
ExecutionSpace execution_space;
unsigned int datamask_read,datamask_modify;
int copymode;
int compute_flag; // 0 if skip compute()
int fftbench; // 0 if skip FFT timing
@ -124,6 +126,11 @@ class KSpace : protected Pointers {
virtual void pack_reverse(int, FFT_SCALAR *, int, int *) {};
virtual void unpack_reverse(int, FFT_SCALAR *, int, int *) {};
virtual void pack_forward_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void unpack_forward_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void pack_reverse_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual void unpack_reverse_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
virtual int timing(int, double &, double &) {return 0;}
virtual int timing_1d(int, double &) {return 0;}
virtual int timing_3d(int, double &) {return 0;}