mirror of https://github.com/lammps/lammps.git
Merge remote-tracking branch 'lammps-rw/clean-up-docs-for-sphinx' into clean-up-docs-for-sphinx
This commit is contained in:
commit
ace5dc3c7c
|
@ -302,6 +302,11 @@ sphere is determined by the <em>bflag1</em> parameter for the <em>body</em> keyw
|
|||
The <em>bflag2</em> argument is ignored.</p>
|
||||
<hr class="docutils" />
|
||||
<p><strong>Specifics of body style rounded/polygon:</strong></p>
|
||||
<div class="admonition note">
|
||||
<p class="first admonition-title">Note</p>
|
||||
<p class="last">Aug 2016 - This body style has not yet been added to LAMMPS.
|
||||
The info below is a placeholder.</p>
|
||||
</div>
|
||||
<p>The <em>rounded/polygon</em> body style represents body particles as a convex
|
||||
polygon with a variable number N > 2 of vertices, which can only be
|
||||
used for 2d models. One example use of this body style is for 2d
|
||||
|
@ -359,8 +364,9 @@ particles whose edge length is sqrt(2):</p>
|
|||
<span class="mf">1.0</span>
|
||||
</pre></div>
|
||||
</div>
|
||||
<p>The <span class="xref doc">pair_style body/rounded/polygon</span> command
|
||||
can be used with this body style to compute body/body interactions.</p>
|
||||
<p>The <span class="xref doc">pair_style body/rounded/polygon</span>
|
||||
command can be used with this body style to compute body/body
|
||||
interactions.</p>
|
||||
<p>For output purposes via the <a class="reference internal" href="compute_body_local.html"><span class="doc">compute body/local</span></a> and <a class="reference internal" href="dump.html"><span class="doc">dump local</span></a>
|
||||
commands, this body style produces one datum for each of the N
|
||||
sub-particles in a body particle. The datum has 3 values:</p>
|
||||
|
|
|
@ -481,6 +481,11 @@ change this via the <a class="reference internal" href="dump_modify.html"><span
|
|||
<p>The <em>fix</em> keyword can be used with a <a class="reference internal" href="fix.html"><span class="doc">fix</span></a> that produces
|
||||
objects to be drawn. An example is the <span class="xref doc">fix surface/global</span> command which can draw lines
|
||||
or triangles for 2d/3d simulations.</p>
|
||||
<div class="admonition note">
|
||||
<p class="first admonition-title">Note</p>
|
||||
<p class="last">Aug 2016 - The fix surface/global command is not yet added to
|
||||
LAMMPS.</p>
|
||||
</div>
|
||||
<p>The <em>fflag1</em> and <em>fflag2</em> settings are numerical values which are
|
||||
passed to the fix to affect how the drawing of its objects is done.
|
||||
See the individual fix doc page for a description of what these
|
||||
|
|
|
@ -132,7 +132,7 @@
|
|||
</pre></div>
|
||||
</div>
|
||||
<ul class="simple">
|
||||
<li>style = <em>none</em> or <em>ewald</em> or <em>ewald/disp</em> or <em>ewald/omp</em> or <em>pppm</em> or <em>pppm/cg</em> or <em>pppm/disp</em> or <em>pppm/tip4p</em> or <em>pppm/stagger</em> or <em>pppm/disp/tip4p</em> or <em>pppm/gpu</em> or <em>pppm/omp</em> or <em>pppm/cg/omp</em> or <em>pppm/tip4p/omp</em> or <em>msm</em> or <em>msm/cg</em> or <em>msm/omp</em> or <em>msm/cg/omp</em></li>
|
||||
<li>style = <em>none</em> or <em>ewald</em> or <em>ewald/disp</em> or <em>ewald/omp</em> or <em>pppm</em> or <em>pppm/cg</em> or <em>pppm/disp</em> or <em>pppm/tip4p</em> or <em>pppm/stagger</em> or <em>pppm/disp/tip4p</em> or <em>pppm/gpu</em> or <em>pppm/kk</em> or <em>pppm/omp</em> or <em>pppm/cg/omp</em> or <em>pppm/tip4p/omp</em> or <em>msm</em> or <em>msm/cg</em> or <em>msm/omp</em> or <em>msm/cg/omp</em></li>
|
||||
</ul>
|
||||
<pre class="literal-block">
|
||||
<em>none</em> value = none
|
||||
|
@ -155,6 +155,8 @@
|
|||
accuracy = desired relative error in forces
|
||||
<em>pppm/gpu</em> value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
<em>pppm/kk</em> value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
<em>pppm/omp</em> value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
<em>pppm/cg/omp</em> value = accuracy
|
||||
|
@ -385,6 +387,9 @@ If <em>pppm/gpu</em> is used with a GPU-enabled pair style, part of the PPPM
|
|||
calculation can be performed concurrently on the GPU while other
|
||||
calculations for non-bonded and bonded force calculation are performed
|
||||
on the CPU.</p>
|
||||
<p>The <em>pppm/kk</em> style also performs charge assignment and force
|
||||
interpolation calculations on the GPU while the FFTs themselves are
|
||||
calculated on the CPU in non-threaded mode.</p>
|
||||
<p>These accelerated styles are part of the GPU, USER-INTEL,
|
||||
KOKKOS, USER-OMP, and OPT packages respectively. They are only
|
||||
enabled if LAMMPS was built with those packages. See the <a class="reference internal" href="Section_start.html#start-3"><span class="std std-ref">Making LAMMPS</span></a> section for more info.</p>
|
||||
|
@ -406,10 +411,10 @@ the KSPACE package is installed by default.</p>
|
|||
<p>For MSM, a simulation must be 3d and one can use any combination of
|
||||
periodic, non-periodic, or shrink-wrapped boundaries (specified using
|
||||
the <a class="reference internal" href="boundary.html"><span class="doc">boundary</span></a> command).</p>
|
||||
<p>For Ewald and PPPM, a simulation must be 3d and periodic in all dimensions.
|
||||
The only exception is if the slab option is set with <a class="reference internal" href="kspace_modify.html"><span class="doc">kspace_modify</span></a>,
|
||||
in which case the xy dimensions must be periodic and the z dimension must be
|
||||
non-periodic.</p>
|
||||
<p>For Ewald and PPPM, a simulation must be 3d and periodic in all
|
||||
dimensions. The only exception is if the slab option is set with
|
||||
<a class="reference internal" href="kspace_modify.html"><span class="doc">kspace_modify</span></a>, in which case the xy dimensions
|
||||
must be periodic and the z dimension must be non-periodic.</p>
|
||||
</div>
|
||||
<div class="section" id="related-commands">
|
||||
<h2>Related commands</h2>
|
||||
|
@ -435,9 +440,10 @@ and Computation 5, 2322 (2009)</p>
|
|||
<p id="veld"><strong>(Veld)</strong> In ‘t Veld, Ismail, Grest, J Chem Phys, 127, 144711 (2007).</p>
|
||||
<p id="toukmaji"><strong>(Toukmaji)</strong> Toukmaji, Sagui, Board, and Darden, J Chem Phys, 113,
|
||||
10913 (2000).</p>
|
||||
<p id="isele-holder"><strong>(Isele-Holder)</strong> Isele-Holder, Mitchell, Ismail, J Chem Phys, 137, 174107 (2012).</p>
|
||||
<p id="isele-holder2"><strong>(Isele-Holder2)</strong> Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail, J Chem Theory
|
||||
Comput 9, 5412 (2013).</p>
|
||||
<p id="isele-holder"><strong>(Isele-Holder)</strong> Isele-Holder, Mitchell, Ismail, J Chem Phys, 137,
|
||||
174107 (2012).</p>
|
||||
<p id="isele-holder2"><strong>(Isele-Holder2)</strong> Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail,
|
||||
J Chem Theory Comput 9, 5412 (2013).</p>
|
||||
<p id="hardy"><strong>(Hardy)</strong> David Hardy thesis: Multilevel Summation for the Fast
|
||||
Evaluation of Forces for the Simulation of Biomolecules, University of
|
||||
Illinois at Urbana-Champaign, (2006).</p>
|
||||
|
|
|
@ -175,6 +175,9 @@ The {bflag2} argument is ignored.
|
|||
|
||||
[Specifics of body style rounded/polygon:]
|
||||
|
||||
NOTE: Aug 2016 - This body style has not yet been added to LAMMPS.
|
||||
The info below is a placeholder.
|
||||
|
||||
The {rounded/polygon} body style represents body particles as a convex
|
||||
polygon with a variable number N > 2 of vertices, which can only be
|
||||
used for 2d models. One example use of this body style is for 2d
|
||||
|
@ -235,8 +238,9 @@ particles whose edge length is sqrt(2):
|
|||
0 1 1 2 2 3 3 0
|
||||
1.0 :pre
|
||||
|
||||
The "pair_style body/rounded/polygon"_pair_body_rounded_polygon.html command
|
||||
can be used with this body style to compute body/body interactions.
|
||||
The "pair_style body/rounded/polygon"_pair_body_rounded_polygon.html
|
||||
command can be used with this body style to compute body/body
|
||||
interactions.
|
||||
|
||||
For output purposes via the "compute
|
||||
body/local"_compute_body_local.html and "dump local"_dump.html
|
||||
|
|
|
@ -388,6 +388,9 @@ objects to be drawn. An example is the "fix
|
|||
surface/global"_fix_surface_global.html command which can draw lines
|
||||
or triangles for 2d/3d simulations.
|
||||
|
||||
NOTE: Aug 2016 - The fix surface/global command is not yet added to
|
||||
LAMMPS.
|
||||
|
||||
The {fflag1} and {fflag2} settings are numerical values which are
|
||||
passed to the fix to affect how the drawing of its objects is done.
|
||||
See the individual fix doc page for a description of what these
|
||||
|
|
|
@ -12,7 +12,7 @@ kspace_style command :h3
|
|||
|
||||
kspace_style style value :pre
|
||||
|
||||
style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg} or {pppm/disp} or {pppm/tip4p} or {pppm/stagger} or {pppm/disp/tip4p} or {pppm/gpu} or {pppm/omp} or {pppm/cg/omp} or {pppm/tip4p/omp} or {msm} or {msm/cg} or {msm/omp} or {msm/cg/omp} :ulb,l
|
||||
style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg} or {pppm/disp} or {pppm/tip4p} or {pppm/stagger} or {pppm/disp/tip4p} or {pppm/gpu} or {pppm/kk} or {pppm/omp} or {pppm/cg/omp} or {pppm/tip4p/omp} or {msm} or {msm/cg} or {msm/omp} or {msm/cg/omp} :ulb,l
|
||||
{none} value = none
|
||||
{ewald} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
|
@ -33,6 +33,8 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
|
|||
accuracy = desired relative error in forces
|
||||
{pppm/gpu} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{pppm/kk} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{pppm/omp} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{pppm/cg/omp} value = accuracy
|
||||
|
@ -271,6 +273,10 @@ calculation can be performed concurrently on the GPU while other
|
|||
calculations for non-bonded and bonded force calculation are performed
|
||||
on the CPU.
|
||||
|
||||
The {pppm/kk} style also performs charge assignment and force
|
||||
interpolation calculations on the GPU while the FFTs themselves are
|
||||
calculated on the CPU in non-threaded mode.
|
||||
|
||||
These accelerated styles are part of the GPU, USER-INTEL,
|
||||
KOKKOS, USER-OMP, and OPT packages respectively. They are only
|
||||
enabled if LAMMPS was built with those packages. See the "Making
|
||||
|
@ -299,10 +305,10 @@ For MSM, a simulation must be 3d and one can use any combination of
|
|||
periodic, non-periodic, or shrink-wrapped boundaries (specified using
|
||||
the "boundary"_boundary.html command).
|
||||
|
||||
For Ewald and PPPM, a simulation must be 3d and periodic in all dimensions.
|
||||
The only exception is if the slab option is set with "kspace_modify"_kspace_modify.html,
|
||||
in which case the xy dimensions must be periodic and the z dimension must be
|
||||
non-periodic.
|
||||
For Ewald and PPPM, a simulation must be 3d and periodic in all
|
||||
dimensions. The only exception is if the slab option is set with
|
||||
"kspace_modify"_kspace_modify.html, in which case the xy dimensions
|
||||
must be periodic and the z dimension must be non-periodic.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
|
@ -354,11 +360,12 @@ and Computation 5, 2322 (2009)
|
|||
10913 (2000).
|
||||
|
||||
:link(Isele-Holder)
|
||||
[(Isele-Holder)] Isele-Holder, Mitchell, Ismail, J Chem Phys, 137, 174107 (2012).
|
||||
[(Isele-Holder)] Isele-Holder, Mitchell, Ismail, J Chem Phys, 137,
|
||||
174107 (2012).
|
||||
|
||||
:link(Isele-Holder2)
|
||||
[(Isele-Holder2)] Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail, J Chem Theory
|
||||
Comput 9, 5412 (2013).
|
||||
[(Isele-Holder2)] Isele-Holder, Mitchell, Hammond, Kohlmeyer, Ismail,
|
||||
J Chem Theory Comput 9, 5412 (2013).
|
||||
|
||||
:link(Hardy)
|
||||
[(Hardy)] David Hardy thesis: Multilevel Summation for the Fast
|
||||
|
|
|
@ -87,6 +87,8 @@ action fix_setforce_kokkos.cpp
|
|||
action fix_setforce_kokkos.h
|
||||
action fix_wall_reflect_kokkos.cpp
|
||||
action fix_wall_reflect_kokkos.h
|
||||
action gridcomm_kokkos.cpp gridcomm.cpp
|
||||
action gridcomm_kokkos.h gridcomm.h
|
||||
action improper_harmonic_kokkos.cpp improper_harmonic.cpp
|
||||
action improper_harmonic_kokkos.h improper_harmonic.h
|
||||
action kokkos.cpp
|
||||
|
@ -102,6 +104,8 @@ action neigh_list_kokkos.cpp
|
|||
action neigh_list_kokkos.h
|
||||
action neighbor_kokkos.cpp
|
||||
action neighbor_kokkos.h
|
||||
action math_special_kokkos.cpp
|
||||
action math_special_kokkos.h
|
||||
action pair_buck_coul_cut_kokkos.cpp
|
||||
action pair_buck_coul_cut_kokkos.h
|
||||
action pair_buck_coul_long_kokkos.cpp pair_buck_coul_long.cpp
|
||||
|
@ -167,6 +171,8 @@ action pair_tersoff_mod_kokkos.cpp pair_tersoff_mod.cpp
|
|||
action pair_tersoff_mod_kokkos.h pair_tersoff_mod.h
|
||||
action pair_tersoff_zbl_kokkos.cpp pair_tersoff_zbl.cpp
|
||||
action pair_tersoff_zbl_kokkos.h pair_tersoff_zbl.h
|
||||
action pppm_kokkos.cpp pppm.cpp
|
||||
action pppm_kokkos.h pppm.h
|
||||
action region_block_kokkos.cpp
|
||||
action region_block_kokkos.h
|
||||
action verlet_kokkos.cpp
|
||||
|
|
|
@ -0,0 +1,614 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include "gridcomm_kokkos.h"
|
||||
#include "comm.h"
|
||||
#include "kspace.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define SWAPDELTA 8
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
GridCommKokkos<DeviceType>::GridCommKokkos(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
|
||||
int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
|
||||
int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
|
||||
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
|
||||
: Pointers(lmp)
|
||||
{
|
||||
gridcomm = gcomm;
|
||||
MPI_Comm_rank(gridcomm,&me);
|
||||
|
||||
nforward = forward;
|
||||
nreverse = reverse;
|
||||
|
||||
inxlo = ixlo;
|
||||
inxhi = ixhi;
|
||||
inylo = iylo;
|
||||
inyhi = iyhi;
|
||||
inzlo = izlo;
|
||||
inzhi = izhi;
|
||||
|
||||
outxlo = oxlo;
|
||||
outxhi = oxhi;
|
||||
outylo = oylo;
|
||||
outyhi = oyhi;
|
||||
outzlo = ozlo;
|
||||
outzhi = ozhi;
|
||||
|
||||
outxlo_max = oxlo;
|
||||
outxhi_max = oxhi;
|
||||
outylo_max = oylo;
|
||||
outyhi_max = oyhi;
|
||||
outzlo_max = ozlo;
|
||||
outzhi_max = ozhi;
|
||||
|
||||
procxlo = pxlo;
|
||||
procxhi = pxhi;
|
||||
procylo = pylo;
|
||||
procyhi = pyhi;
|
||||
proczlo = pzlo;
|
||||
proczhi = pzhi;
|
||||
|
||||
nswap = 0;
|
||||
swap = NULL;
|
||||
//buf1 = buf2 = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
GridCommKokkos<DeviceType>::GridCommKokkos(LAMMPS *lmp, MPI_Comm gcomm, int forward, int reverse,
|
||||
int ixlo, int ixhi, int iylo, int iyhi, int izlo, int izhi,
|
||||
int oxlo, int oxhi, int oylo, int oyhi, int ozlo, int ozhi,
|
||||
int oxlo_max, int oxhi_max, int oylo_max, int oyhi_max,
|
||||
int ozlo_max, int ozhi_max,
|
||||
int pxlo, int pxhi, int pylo, int pyhi, int pzlo, int pzhi)
|
||||
: Pointers(lmp)
|
||||
{
|
||||
gridcomm = gcomm;
|
||||
MPI_Comm_rank(gridcomm,&me);
|
||||
|
||||
nforward = forward;
|
||||
nreverse = reverse;
|
||||
|
||||
inxlo = ixlo;
|
||||
inxhi = ixhi;
|
||||
inylo = iylo;
|
||||
inyhi = iyhi;
|
||||
inzlo = izlo;
|
||||
inzhi = izhi;
|
||||
|
||||
outxlo = oxlo;
|
||||
outxhi = oxhi;
|
||||
outylo = oylo;
|
||||
outyhi = oyhi;
|
||||
outzlo = ozlo;
|
||||
outzhi = ozhi;
|
||||
|
||||
outxlo_max = oxlo_max;
|
||||
outxhi_max = oxhi_max;
|
||||
outylo_max = oylo_max;
|
||||
outyhi_max = oyhi_max;
|
||||
outzlo_max = ozlo_max;
|
||||
outzhi_max = ozhi_max;
|
||||
|
||||
procxlo = pxlo;
|
||||
procxhi = pxhi;
|
||||
procylo = pylo;
|
||||
procyhi = pyhi;
|
||||
proczlo = pzlo;
|
||||
proczhi = pzhi;
|
||||
|
||||
nswap = 0;
|
||||
swap = NULL;
|
||||
//buf1 = buf2 = NULL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
GridCommKokkos<DeviceType>::~GridCommKokkos()
|
||||
{
|
||||
for (int i = 0; i < nswap; i++) {
|
||||
//memory->destroy_kokkos(swap[i].k_packlist,swap[i].packlist);
|
||||
//memory->destroy_kokkos(swap[i].k_unpacklist,swap[i].unpacklist);
|
||||
}
|
||||
memory->sfree(swap);
|
||||
|
||||
//memory->destroy(buf1);
|
||||
//memory->destroy(buf2);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
notify 6 neighbor procs how many ghost grid planes I need from them
|
||||
ghostxlo = # of lower grid planes I own that are needed from me
|
||||
by procxlo to become its upper ghost planes
|
||||
ghostxhi = # of upper grid planes I own that are needed from me
|
||||
by procxhi to become its lower ghost planes
|
||||
if no neighbor proc, value is from self
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GridCommKokkos<DeviceType>::ghost_notify()
|
||||
{
|
||||
int nplanes = inxlo - outxlo;
|
||||
if (procxlo != me)
|
||||
MPI_Sendrecv(&nplanes,1,MPI_INT,procxlo,0,
|
||||
&ghostxhi,1,MPI_INT,procxhi,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else ghostxhi = nplanes;
|
||||
|
||||
nplanes = outxhi - inxhi;
|
||||
if (procxhi != me)
|
||||
MPI_Sendrecv(&nplanes,1,MPI_INT,procxhi,0,
|
||||
&ghostxlo,1,MPI_INT,procxlo,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else ghostxlo = nplanes;
|
||||
|
||||
nplanes = inylo - outylo;
|
||||
if (procylo != me)
|
||||
MPI_Sendrecv(&nplanes,1,MPI_INT,procylo,0,
|
||||
&ghostyhi,1,MPI_INT,procyhi,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else ghostyhi = nplanes;
|
||||
|
||||
nplanes = outyhi - inyhi;
|
||||
if (procyhi != me)
|
||||
MPI_Sendrecv(&nplanes,1,MPI_INT,procyhi,0,
|
||||
&ghostylo,1,MPI_INT,procylo,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else ghostylo = nplanes;
|
||||
|
||||
nplanes = inzlo - outzlo;
|
||||
if (proczlo != me)
|
||||
MPI_Sendrecv(&nplanes,1,MPI_INT,proczlo,0,
|
||||
&ghostzhi,1,MPI_INT,proczhi,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else ghostzhi = nplanes;
|
||||
|
||||
nplanes = outzhi - inzhi;
|
||||
if (proczhi != me)
|
||||
MPI_Sendrecv(&nplanes,1,MPI_INT,proczhi,0,
|
||||
&ghostzlo,1,MPI_INT,proczlo,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else ghostzlo = nplanes;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
check if all ghost grid comm needs overlap into non nearest-neighbor proc
|
||||
if yes, return 1, else return 0
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int GridCommKokkos<DeviceType>::ghost_overlap()
|
||||
{
|
||||
int nearest = 0;
|
||||
if (ghostxlo > inxhi-inxlo+1) nearest = 1;
|
||||
if (ghostxhi > inxhi-inxlo+1) nearest = 1;
|
||||
if (ghostylo > inyhi-inylo+1) nearest = 1;
|
||||
if (ghostyhi > inyhi-inylo+1) nearest = 1;
|
||||
if (ghostzlo > inzhi-inzlo+1) nearest = 1;
|
||||
if (ghostzhi > inzhi-inzlo+1) nearest = 1;
|
||||
|
||||
int nearest_all;
|
||||
MPI_Allreduce(&nearest,&nearest_all,1,MPI_INT,MPI_MIN,gridcomm);
|
||||
|
||||
return nearest_all;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create swap stencil for grid own/ghost communication
|
||||
swaps covers all 3 dimensions and both directions
|
||||
swaps cover multiple iterations in a direction if need grid pts
|
||||
from further away than nearest-neighbor proc
|
||||
same swap list used by forward and reverse communication
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GridCommKokkos<DeviceType>::setup()
|
||||
{
|
||||
int nsent,sendfirst,sendlast,recvfirst,recvlast;
|
||||
int sendplanes,recvplanes;
|
||||
int notdoneme,notdone;
|
||||
|
||||
int maxswap = 6;
|
||||
swap = (Swap *) memory->smalloc(maxswap*sizeof(Swap),"Commgrid:swap");
|
||||
k_packlist = DAT::tdual_int_2d("Commgrid:packlist",maxswap,1);
|
||||
k_unpacklist = DAT::tdual_int_2d("Commgrid:unpacklist",maxswap,1);
|
||||
nswap = 0;
|
||||
|
||||
// send own grid pts to -x processor, recv ghost grid pts from +x processor
|
||||
|
||||
nsent = 0;
|
||||
sendfirst = inxlo;
|
||||
sendlast = inxhi;
|
||||
recvfirst = inxhi+1;
|
||||
notdone = 1;
|
||||
|
||||
while (notdone) {
|
||||
if (nswap == maxswap) {
|
||||
maxswap += SWAPDELTA;
|
||||
swap = (Swap *)
|
||||
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
|
||||
k_packlist.resize(maxswap,k_packlist.dimension_1());
|
||||
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
|
||||
}
|
||||
|
||||
swap[nswap].sendproc = procxlo;
|
||||
swap[nswap].recvproc = procxhi;
|
||||
sendplanes = MIN(sendlast-sendfirst+1,ghostxlo-nsent);
|
||||
swap[nswap].npack =
|
||||
indices(k_packlist,nswap,
|
||||
sendfirst,sendfirst+sendplanes-1,inylo,inyhi,inzlo,inzhi);
|
||||
|
||||
if (procxlo != me)
|
||||
MPI_Sendrecv(&sendplanes,1,MPI_INT,procxlo,0,
|
||||
&recvplanes,1,MPI_INT,procxhi,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else recvplanes = sendplanes;
|
||||
|
||||
swap[nswap].nunpack =
|
||||
indices(k_unpacklist,nswap,
|
||||
recvfirst,recvfirst+recvplanes-1,inylo,inyhi,inzlo,inzhi);
|
||||
|
||||
nsent += sendplanes;
|
||||
sendfirst += sendplanes;
|
||||
sendlast += recvplanes;
|
||||
recvfirst += recvplanes;
|
||||
nswap++;
|
||||
|
||||
if (nsent < ghostxlo) notdoneme = 1;
|
||||
else notdoneme = 0;
|
||||
MPI_Allreduce(¬doneme,¬done,1,MPI_INT,MPI_SUM,gridcomm);
|
||||
}
|
||||
|
||||
// send own grid pts to +x processor, recv ghost grid pts from -x processor
|
||||
|
||||
nsent = 0;
|
||||
sendfirst = inxlo;
|
||||
sendlast = inxhi;
|
||||
recvlast = inxlo-1;
|
||||
notdone = 1;
|
||||
|
||||
while (notdone) {
|
||||
if (nswap == maxswap) {
|
||||
maxswap += 1;
|
||||
swap = (Swap *)
|
||||
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
|
||||
k_packlist.resize(maxswap,k_packlist.dimension_1());
|
||||
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
|
||||
}
|
||||
|
||||
swap[nswap].sendproc = procxhi;
|
||||
swap[nswap].recvproc = procxlo;
|
||||
sendplanes = MIN(sendlast-sendfirst+1,ghostxhi-nsent);
|
||||
swap[nswap].npack =
|
||||
indices(k_packlist,nswap,
|
||||
sendlast-sendplanes+1,sendlast,inylo,inyhi,inzlo,inzhi);
|
||||
|
||||
if (procxhi != me)
|
||||
MPI_Sendrecv(&sendplanes,1,MPI_INT,procxhi,0,
|
||||
&recvplanes,1,MPI_INT,procxlo,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else recvplanes = sendplanes;
|
||||
|
||||
swap[nswap].nunpack =
|
||||
indices(k_unpacklist,nswap,
|
||||
recvlast-recvplanes+1,recvlast,inylo,inyhi,inzlo,inzhi);
|
||||
|
||||
nsent += sendplanes;
|
||||
sendfirst -= recvplanes;
|
||||
sendlast -= sendplanes;
|
||||
recvlast -= recvplanes;
|
||||
nswap++;
|
||||
|
||||
if (nsent < ghostxhi) notdoneme = 1;
|
||||
else notdoneme = 0;
|
||||
MPI_Allreduce(¬doneme,¬done,1,MPI_INT,MPI_SUM,gridcomm);
|
||||
}
|
||||
|
||||
// send own grid pts to -y processor, recv ghost grid pts from +y processor
|
||||
|
||||
nsent = 0;
|
||||
sendfirst = inylo;
|
||||
sendlast = inyhi;
|
||||
recvfirst = inyhi+1;
|
||||
notdone = 1;
|
||||
|
||||
while (notdone) {
|
||||
if (nswap == maxswap) {
|
||||
maxswap += SWAPDELTA;
|
||||
swap = (Swap *)
|
||||
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
|
||||
k_packlist.resize(maxswap,k_packlist.dimension_1());
|
||||
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
|
||||
}
|
||||
|
||||
swap[nswap].sendproc = procylo;
|
||||
swap[nswap].recvproc = procyhi;
|
||||
sendplanes = MIN(sendlast-sendfirst+1,ghostylo-nsent);
|
||||
swap[nswap].npack =
|
||||
indices(k_packlist,nswap,
|
||||
outxlo,outxhi,sendfirst,sendfirst+sendplanes-1,inzlo,inzhi);
|
||||
|
||||
if (procylo != me)
|
||||
MPI_Sendrecv(&sendplanes,1,MPI_INT,procylo,0,
|
||||
&recvplanes,1,MPI_INT,procyhi,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else recvplanes = sendplanes;
|
||||
|
||||
swap[nswap].nunpack =
|
||||
indices(k_unpacklist,nswap,
|
||||
outxlo,outxhi,recvfirst,recvfirst+recvplanes-1,inzlo,inzhi);
|
||||
|
||||
nsent += sendplanes;
|
||||
sendfirst += sendplanes;
|
||||
sendlast += recvplanes;
|
||||
recvfirst += recvplanes;
|
||||
nswap++;
|
||||
|
||||
if (nsent < ghostylo) notdoneme = 1;
|
||||
else notdoneme = 0;
|
||||
MPI_Allreduce(¬doneme,¬done,1,MPI_INT,MPI_SUM,gridcomm);
|
||||
}
|
||||
|
||||
// send own grid pts to +y processor, recv ghost grid pts from -y processor
|
||||
|
||||
nsent = 0;
|
||||
sendfirst = inylo;
|
||||
sendlast = inyhi;
|
||||
recvlast = inylo-1;
|
||||
notdone = 1;
|
||||
|
||||
while (notdone) {
|
||||
if (nswap == maxswap) {
|
||||
maxswap += 1;
|
||||
swap = (Swap *)
|
||||
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
|
||||
k_packlist.resize(maxswap,k_packlist.dimension_1());
|
||||
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
|
||||
}
|
||||
|
||||
swap[nswap].sendproc = procyhi;
|
||||
swap[nswap].recvproc = procylo;
|
||||
sendplanes = MIN(sendlast-sendfirst+1,ghostyhi-nsent);
|
||||
swap[nswap].npack =
|
||||
indices(k_packlist,nswap,
|
||||
outxlo,outxhi,sendlast-sendplanes+1,sendlast,inzlo,inzhi);
|
||||
|
||||
if (procyhi != me)
|
||||
MPI_Sendrecv(&sendplanes,1,MPI_INT,procyhi,0,
|
||||
&recvplanes,1,MPI_INT,procylo,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else recvplanes = sendplanes;
|
||||
|
||||
swap[nswap].nunpack =
|
||||
indices(k_unpacklist,nswap,
|
||||
outxlo,outxhi,recvlast-recvplanes+1,recvlast,inzlo,inzhi);
|
||||
|
||||
nsent += sendplanes;
|
||||
sendfirst -= recvplanes;
|
||||
sendlast -= sendplanes;
|
||||
recvlast -= recvplanes;
|
||||
nswap++;
|
||||
|
||||
if (nsent < ghostyhi) notdoneme = 1;
|
||||
else notdoneme = 0;
|
||||
MPI_Allreduce(¬doneme,¬done,1,MPI_INT,MPI_SUM,gridcomm);
|
||||
}
|
||||
|
||||
// send own grid pts to -z processor, recv ghost grid pts from +z processor
|
||||
|
||||
nsent = 0;
|
||||
sendfirst = inzlo;
|
||||
sendlast = inzhi;
|
||||
recvfirst = inzhi+1;
|
||||
notdone = 1;
|
||||
|
||||
while (notdone) {
|
||||
if (nswap == maxswap) {
|
||||
maxswap += SWAPDELTA;
|
||||
swap = (Swap *)
|
||||
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
|
||||
k_packlist.resize(maxswap,k_packlist.dimension_1());
|
||||
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
|
||||
}
|
||||
|
||||
swap[nswap].sendproc = proczlo;
|
||||
swap[nswap].recvproc = proczhi;
|
||||
sendplanes = MIN(sendlast-sendfirst+1,ghostzlo-nsent);
|
||||
swap[nswap].npack =
|
||||
indices(k_packlist,nswap,
|
||||
outxlo,outxhi,outylo,outyhi,sendfirst,sendfirst+sendplanes-1);
|
||||
|
||||
if (proczlo != me)
|
||||
MPI_Sendrecv(&sendplanes,1,MPI_INT,proczlo,0,
|
||||
&recvplanes,1,MPI_INT,proczhi,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else recvplanes = sendplanes;
|
||||
|
||||
swap[nswap].nunpack =
|
||||
indices(k_unpacklist,nswap,
|
||||
outxlo,outxhi,outylo,outyhi,recvfirst,recvfirst+recvplanes-1);
|
||||
|
||||
nsent += sendplanes;
|
||||
sendfirst += sendplanes;
|
||||
sendlast += recvplanes;
|
||||
recvfirst += recvplanes;
|
||||
nswap++;
|
||||
|
||||
if (nsent < ghostzlo) notdoneme = 1;
|
||||
else notdoneme = 0;
|
||||
MPI_Allreduce(¬doneme,¬done,1,MPI_INT,MPI_SUM,gridcomm);
|
||||
}
|
||||
|
||||
// send own grid pts to +z processor, recv ghost grid pts from -z processor
|
||||
|
||||
nsent = 0;
|
||||
sendfirst = inzlo;
|
||||
sendlast = inzhi;
|
||||
recvlast = inzlo-1;
|
||||
notdone = 1;
|
||||
|
||||
while (notdone) {
|
||||
if (nswap == maxswap) {
|
||||
maxswap += 1;
|
||||
swap = (Swap *)
|
||||
memory->srealloc(swap,maxswap*sizeof(Swap),"Commgrid:swap");
|
||||
k_packlist.resize(maxswap,k_packlist.dimension_1());
|
||||
k_unpacklist.resize(maxswap,k_unpacklist.dimension_1());
|
||||
}
|
||||
|
||||
swap[nswap].sendproc = proczhi;
|
||||
swap[nswap].recvproc = proczlo;
|
||||
sendplanes = MIN(sendlast-sendfirst+1,ghostzhi-nsent);
|
||||
swap[nswap].npack =
|
||||
indices(k_packlist,nswap,
|
||||
outxlo,outxhi,outylo,outyhi,sendlast-sendplanes+1,sendlast);
|
||||
|
||||
if (proczhi != me)
|
||||
MPI_Sendrecv(&sendplanes,1,MPI_INT,proczhi,0,
|
||||
&recvplanes,1,MPI_INT,proczlo,0,gridcomm,MPI_STATUS_IGNORE);
|
||||
else recvplanes = sendplanes;
|
||||
|
||||
swap[nswap].nunpack =
|
||||
indices(k_unpacklist,nswap,
|
||||
outxlo,outxhi,outylo,outyhi,recvlast-recvplanes+1,recvlast);
|
||||
|
||||
nsent += sendplanes;
|
||||
sendfirst -= recvplanes;
|
||||
sendlast -= sendplanes;
|
||||
recvlast -= recvplanes;
|
||||
nswap++;
|
||||
|
||||
if (nsent < ghostzhi) notdoneme = 1;
|
||||
else notdoneme = 0;
|
||||
MPI_Allreduce(¬doneme,¬done,1,MPI_INT,MPI_SUM,gridcomm);
|
||||
}
|
||||
|
||||
// nbuf = max of any forward/reverse pack/unpack
|
||||
|
||||
nbuf = 0;
|
||||
for (int i = 0; i < nswap; i++) {
|
||||
nbuf = MAX(nbuf,swap[i].npack);
|
||||
nbuf = MAX(nbuf,swap[i].nunpack);
|
||||
}
|
||||
nbuf *= MAX(nforward,nreverse);
|
||||
//memory->create(buf1,nbuf,"Commgrid:buf1");
|
||||
k_buf1 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf1",nbuf);
|
||||
//memory->create(buf2,nbuf,"Commgrid:buf2");
|
||||
k_buf2 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf2",nbuf);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
use swap list in forward order to acquire copy of all needed ghost grid pts
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
|
||||
{
|
||||
k_packlist.sync<DeviceType>();
|
||||
k_unpacklist.sync<DeviceType>();
|
||||
|
||||
for (int m = 0; m < nswap; m++) {
|
||||
if (swap[m].sendproc == me)
|
||||
kspace->pack_forward_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
|
||||
else
|
||||
kspace->pack_forward_kokkos(which,k_buf1,swap[m].npack,k_packlist,m);
|
||||
|
||||
if (swap[m].sendproc != me) {
|
||||
MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nforward*swap[m].nunpack,MPI_FFT_SCALAR,
|
||||
swap[m].recvproc,0,gridcomm,&request);
|
||||
MPI_Send(k_buf1.view<DeviceType>().ptr_on_device(),nforward*swap[m].npack,MPI_FFT_SCALAR,
|
||||
swap[m].sendproc,0,gridcomm);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
}
|
||||
|
||||
kspace->unpack_forward_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
use swap list in reverse order to compute fully summed value
|
||||
for each owned grid pt that some other proc has copy of as a ghost grid pt
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
|
||||
{
|
||||
k_packlist.sync<DeviceType>();
|
||||
k_unpacklist.sync<DeviceType>();
|
||||
|
||||
for (int m = nswap-1; m >= 0; m--) {
|
||||
if (swap[m].recvproc == me)
|
||||
kspace->pack_reverse_kokkos(which,k_buf2,swap[m].nunpack,k_unpacklist,m);
|
||||
else
|
||||
kspace->pack_reverse_kokkos(which,k_buf1,swap[m].nunpack,k_unpacklist,m);
|
||||
|
||||
if (swap[m].recvproc != me) {
|
||||
MPI_Irecv(k_buf2.view<DeviceType>().ptr_on_device(),nreverse*swap[m].npack,MPI_FFT_SCALAR,
|
||||
swap[m].sendproc,0,gridcomm,&request);
|
||||
MPI_Send(k_buf1.view<DeviceType>().ptr_on_device(),nreverse*swap[m].nunpack,MPI_FFT_SCALAR,
|
||||
swap[m].recvproc,0,gridcomm);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
}
|
||||
|
||||
kspace->unpack_reverse_kokkos(which,k_buf2,swap[m].npack,k_packlist,m);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
create 1d list of offsets into 3d array section (xlo:xhi,ylo:yhi,zlo:zhi)
|
||||
assume 3d array is allocated as (0:outxhi_max-outxlo_max+1,0:outyhi_max-outylo_max+1,
|
||||
0:outzhi_max-outzlo_max+1)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
int GridCommKokkos<DeviceType>::indices(DAT::tdual_int_2d &k_list, int index,
|
||||
int xlo, int xhi, int ylo, int yhi, int zlo, int zhi)
|
||||
{
|
||||
int nmax = (xhi-xlo+1) * (yhi-ylo+1) * (zhi-zlo+1);
|
||||
if (k_list.dimension_1() < nmax)
|
||||
k_list.resize(k_list.dimension_0(),nmax);
|
||||
|
||||
int nx = (outxhi_max-outxlo_max+1);
|
||||
int ny = (outyhi_max-outylo_max+1);
|
||||
|
||||
k_list.sync<LMPHostType>();
|
||||
|
||||
int n = 0;
|
||||
int ix,iy,iz;
|
||||
for (iz = zlo; iz <= zhi; iz++)
|
||||
for (iy = ylo; iy <= yhi; iy++)
|
||||
for (ix = xlo; ix <= xhi; ix++)
|
||||
k_list.h_view(index,n++) = (iz-outzlo_max)*ny*nx + (iy-outylo_max)*nx + (ix-outxlo_max);
|
||||
|
||||
k_list.modify<LMPHostType>();
|
||||
k_list.sync<DeviceType>();
|
||||
|
||||
return nmax;
|
||||
}
|
||||
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
memory usage of send/recv bufs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
double GridCommKokkos<DeviceType>::memory_usage()
|
||||
{
|
||||
double bytes = 2*nbuf * sizeof(double);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
template class GridCommKokkos<LMPDeviceType>;
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
template class GridCommKokkos<LMPHostType>;
|
||||
#endif
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_GRIDCOMM_KOKKOS_H
|
||||
#define LMP_GRIDCOMM_KOKKOS_H
|
||||
|
||||
#include "pointers.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
typedef float FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||
#else
|
||||
typedef double FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||
#endif
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
template<class DeviceType>
|
||||
class GridCommKokkos : protected Pointers {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int,
|
||||
int, int, int, int, int, int);
|
||||
~GridCommKokkos();
|
||||
void ghost_notify();
|
||||
int ghost_overlap();
|
||||
void setup();
|
||||
void forward_comm(class KSpace *, int);
|
||||
void reverse_comm(class KSpace *, int);
|
||||
double memory_usage();
|
||||
|
||||
private:
|
||||
int me;
|
||||
int nforward,nreverse;
|
||||
MPI_Comm gridcomm;
|
||||
MPI_Request request;
|
||||
|
||||
// in = inclusive indices of 3d grid chunk that I own
|
||||
// out = inclusive indices of 3d grid chunk I own plus ghosts I use
|
||||
// proc = 6 neighbor procs that surround me
|
||||
// ghost = # of my owned grid planes needed from me
|
||||
// by each of 6 neighbor procs to become their ghost planes
|
||||
|
||||
int inxlo,inxhi,inylo,inyhi,inzlo,inzhi;
|
||||
int outxlo,outxhi,outylo,outyhi,outzlo,outzhi;
|
||||
int outxlo_max,outxhi_max,outylo_max,outyhi_max,outzlo_max,outzhi_max;
|
||||
int procxlo,procxhi,procylo,procyhi,proczlo,proczhi;
|
||||
int ghostxlo,ghostxhi,ghostylo,ghostyhi,ghostzlo,ghostzhi;
|
||||
|
||||
int nbuf;
|
||||
//FFT_SCALAR *buf1,*buf2;
|
||||
DAT::tdual_FFT_SCALAR_1d k_buf1;
|
||||
DAT::tdual_FFT_SCALAR_1d k_buf2;
|
||||
|
||||
struct Swap {
|
||||
int sendproc; // proc to send to for forward comm
|
||||
int recvproc; // proc to recv from for forward comm
|
||||
int npack; // # of datums to pack
|
||||
int nunpack; // # of datums to unpack
|
||||
//int *packlist; // 3d array offsets to pack
|
||||
//int *unpacklist; // 3d array offsets to unpack
|
||||
};
|
||||
|
||||
DAT::tdual_int_2d k_packlist;
|
||||
DAT::tdual_int_2d k_unpacklist;
|
||||
|
||||
int nswap;
|
||||
Swap *swap;
|
||||
|
||||
int indices(DAT::tdual_int_2d &, int, int, int, int, int, int, int);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
|
@ -14,6 +14,8 @@
|
|||
#ifndef LMP_LMPTYPE_KOKKOS_H
|
||||
#define LMP_LMPTYPE_KOKKOS_H
|
||||
|
||||
#include "lmptype.h"
|
||||
|
||||
#include <Kokkos_Core.hpp>
|
||||
#include <Kokkos_DualView.hpp>
|
||||
#include <impl/Kokkos_Timer.hpp>
|
||||
|
@ -24,6 +26,21 @@
|
|||
#define ISFINITE(x) std::isfinite(x)
|
||||
#endif
|
||||
|
||||
// User-settable FFT precision
|
||||
|
||||
// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag)
|
||||
// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag)
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
#define FFT_PRECISION 1
|
||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||
typedef float FFT_SCALAR;
|
||||
#else
|
||||
#define FFT_PRECISION 2
|
||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||
typedef double FFT_SCALAR;
|
||||
#endif
|
||||
|
||||
#define MAX_TYPES_STACKPARAMS 12
|
||||
#define NeighClusterSize 8
|
||||
|
||||
|
@ -567,6 +584,32 @@ typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um;
|
|||
typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
|
||||
typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
|
||||
|
||||
//Kspace
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
|
||||
typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d;
|
||||
typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um;
|
||||
|
||||
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
|
||||
typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d;
|
||||
|
||||
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
|
||||
typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3;
|
||||
|
||||
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
|
||||
typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
|
||||
typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d;
|
||||
typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
|
||||
typedef tdual_int_64::t_dev t_int_64;
|
||||
typedef tdual_int_64::t_dev_um t_int_64_um;
|
||||
|
||||
};
|
||||
|
||||
#ifdef KOKKOS_HAVE_CUDA
|
||||
|
@ -801,6 +844,33 @@ typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um;
|
|||
typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
|
||||
typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
|
||||
|
||||
|
||||
//Kspace
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
|
||||
typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d;
|
||||
typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um;
|
||||
|
||||
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
|
||||
typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d;
|
||||
|
||||
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
|
||||
typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3;
|
||||
|
||||
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
|
||||
typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
|
||||
typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d;
|
||||
typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um;
|
||||
|
||||
typedef Kokkos::
|
||||
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
|
||||
typedef tdual_int_64::t_host t_int_64;
|
||||
typedef tdual_int_64::t_host_um t_int_64_um;
|
||||
|
||||
};
|
||||
#endif
|
||||
//default LAMMPS Types
|
||||
|
|
|
@ -0,0 +1,534 @@
|
|||
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include "math_special_kokkos.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
/* Library libcerf:
|
||||
* Compute complex error functions, based on a new implementation of
|
||||
* Faddeeva's w_of_z. Also provide Dawson and Voigt functions.
|
||||
*
|
||||
* File erfcx.c:
|
||||
* Compute erfcx(x) = exp(x^2) erfc(x) function, for real x,
|
||||
* using a novel algorithm that is much faster than DERFC of SLATEC.
|
||||
* This function is used in the computation of Faddeeva, Dawson, and
|
||||
* other complex error functions.
|
||||
*
|
||||
* Copyright:
|
||||
* (C) 2012 Massachusetts Institute of Technology
|
||||
* (C) 2013 Forschungszentrum Jülich GmbH
|
||||
*
|
||||
* Licence:
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
||||
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
||||
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Steven G. Johnson, Massachusetts Institute of Technology, 2012, core author
|
||||
* Joachim Wuttke, Forschungszentrum Jülich, 2013, package maintainer
|
||||
*
|
||||
* Website:
|
||||
* http://apps.jcns.fz-juelich.de/libcerf
|
||||
*
|
||||
* Revision history:
|
||||
* ../CHANGELOG
|
||||
*
|
||||
* Manual page:
|
||||
* man 3 erfcx
|
||||
*/
|
||||
|
||||
/******************************************************************************/
|
||||
/* Lookup-table for Chebyshev polynomials for smaller |x| */
|
||||
/******************************************************************************/
|
||||
|
||||
double MathSpecialKokkos::erfcx_y100(const double y100)
|
||||
{
|
||||
// Steven G. Johnson, October 2012.
|
||||
|
||||
// Given y100=100*y, where y = 4/(4+x) for x >= 0, compute erfc(x).
|
||||
|
||||
// Uses a look-up table of 100 different Chebyshev polynomials
|
||||
// for y intervals [0,0.01], [0.01,0.02], ...., [0.99,1], generated
|
||||
// with the help of Maple and a little shell script. This allows
|
||||
// the Chebyshev polynomials to be of significantly lower degree (about 1/4)
|
||||
// compared to fitting the whole [0,1] interval with a single polynomial.
|
||||
|
||||
switch ((int) y100) {
|
||||
case 0: {
|
||||
double t = 2*y100 - 1;
|
||||
return 0.70878032454106438663e-3 + (0.71234091047026302958e-3 + (0.35779077297597742384e-5 + (0.17403143962587937815e-7 + (0.81710660047307788845e-10 + (0.36885022360434957634e-12 + 0.15917038551111111111e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 1: {
|
||||
double t = 2*y100 - 3;
|
||||
return 0.21479143208285144230e-2 + (0.72686402367379996033e-3 + (0.36843175430938995552e-5 + (0.18071841272149201685e-7 + (0.85496449296040325555e-10 + (0.38852037518534291510e-12 + 0.16868473576888888889e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 2: {
|
||||
double t = 2*y100 - 5;
|
||||
return 0.36165255935630175090e-2 + (0.74182092323555510862e-3 + (0.37948319957528242260e-5 + (0.18771627021793087350e-7 + (0.89484715122415089123e-10 + (0.40935858517772440862e-12 + 0.17872061464888888889e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 3: {
|
||||
double t = 2*y100 - 7;
|
||||
return 0.51154983860031979264e-2 + (0.75722840734791660540e-3 + (0.39096425726735703941e-5 + (0.19504168704300468210e-7 + (0.93687503063178993915e-10 + (0.43143925959079664747e-12 + 0.18939926435555555556e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 4: {
|
||||
double t = 2*y100 - 9;
|
||||
return 0.66457513172673049824e-2 + (0.77310406054447454920e-3 + (0.40289510589399439385e-5 + (0.20271233238288381092e-7 + (0.98117631321709100264e-10 + (0.45484207406017752971e-12 + 0.20076352213333333333e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 5: {
|
||||
double t = 2*y100 - 11;
|
||||
return 0.82082389970241207883e-2 + (0.78946629611881710721e-3 + (0.41529701552622656574e-5 + (0.21074693344544655714e-7 + (0.10278874108587317989e-9 + (0.47965201390613339638e-12 + 0.21285907413333333333e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 6: {
|
||||
double t = 2*y100 - 13;
|
||||
return 0.98039537275352193165e-2 + (0.80633440108342840956e-3 + (0.42819241329736982942e-5 + (0.21916534346907168612e-7 + (0.10771535136565470914e-9 + (0.50595972623692822410e-12 + 0.22573462684444444444e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 7: {
|
||||
double t = 2*y100 - 15;
|
||||
return 0.11433927298290302370e-1 + (0.82372858383196561209e-3 + (0.44160495311765438816e-5 + (0.22798861426211986056e-7 + (0.11291291745879239736e-9 + (0.53386189365816880454e-12 + 0.23944209546666666667e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 8: {
|
||||
double t = 2*y100 - 17;
|
||||
return 0.13099232878814653979e-1 + (0.84167002467906968214e-3 + (0.45555958988457506002e-5 + (0.23723907357214175198e-7 + (0.11839789326602695603e-9 + (0.56346163067550237877e-12 + 0.25403679644444444444e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 9: {
|
||||
double t = 2*y100 - 19;
|
||||
return 0.14800987015587535621e-1 + (0.86018092946345943214e-3 + (0.47008265848816866105e-5 + (0.24694040760197315333e-7 + (0.12418779768752299093e-9 + (0.59486890370320261949e-12 + 0.26957764568888888889e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 10: {
|
||||
double t = 2*y100 - 21;
|
||||
return 0.16540351739394069380e-1 + (0.87928458641241463952e-3 + (0.48520195793001753903e-5 + (0.25711774900881709176e-7 + (0.13030128534230822419e-9 + (0.62820097586874779402e-12 + 0.28612737351111111111e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 11: {
|
||||
double t = 2*y100 - 23;
|
||||
return 0.18318536789842392647e-1 + (0.89900542647891721692e-3 + (0.50094684089553365810e-5 + (0.26779777074218070482e-7 + (0.13675822186304615566e-9 + (0.66358287745352705725e-12 + 0.30375273884444444444e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 12: {
|
||||
double t = 2*y100 - 25;
|
||||
return 0.20136801964214276775e-1 + (0.91936908737673676012e-3 + (0.51734830914104276820e-5 + (0.27900878609710432673e-7 + (0.14357976402809042257e-9 + (0.70114790311043728387e-12 + 0.32252476000000000000e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 13: {
|
||||
double t = 2*y100 - 27;
|
||||
return 0.21996459598282740954e-1 + (0.94040248155366777784e-3 + (0.53443911508041164739e-5 + (0.29078085538049374673e-7 + (0.15078844500329731137e-9 + (0.74103813647499204269e-12 + 0.34251892320000000000e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 14: {
|
||||
double t = 2*y100 - 29;
|
||||
return 0.23898877187226319502e-1 + (0.96213386835900177540e-3 + (0.55225386998049012752e-5 + (0.30314589961047687059e-7 + (0.15840826497296335264e-9 + (0.78340500472414454395e-12 + 0.36381553564444444445e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 15: {
|
||||
double t = 2*y100 - 31;
|
||||
return 0.25845480155298518485e-1 + (0.98459293067820123389e-3 + (0.57082915920051843672e-5 + (0.31613782169164830118e-7 + (0.16646478745529630813e-9 + (0.82840985928785407942e-12 + 0.38649975768888888890e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 16: {
|
||||
double t = 2*y100 - 33;
|
||||
return 0.27837754783474696598e-1 + (0.10078108563256892757e-2 + (0.59020366493792212221e-5 + (0.32979263553246520417e-7 + (0.17498524159268458073e-9 + (0.87622459124842525110e-12 + 0.41066206488888888890e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 17: {
|
||||
double t = 2*y100 - 35;
|
||||
return 0.29877251304899307550e-1 + (0.10318204245057349310e-2 + (0.61041829697162055093e-5 + (0.34414860359542720579e-7 + (0.18399863072934089607e-9 + (0.92703227366365046533e-12 + 0.43639844053333333334e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 18: {
|
||||
double t = 2*y100 - 37;
|
||||
return 0.31965587178596443475e-1 + (0.10566560976716574401e-2 + (0.63151633192414586770e-5 + (0.35924638339521924242e-7 + (0.19353584758781174038e-9 + (0.98102783859889264382e-12 + 0.46381060817777777779e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 19: {
|
||||
double t = 2*y100 - 39;
|
||||
return 0.34104450552588334840e-1 + (0.10823541191350532574e-2 + (0.65354356159553934436e-5 + (0.37512918348533521149e-7 + (0.20362979635817883229e-9 + (0.10384187833037282363e-11 + 0.49300625262222222221e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 20: {
|
||||
double t = 2*y100 - 41;
|
||||
return 0.36295603928292425716e-1 + (0.11089526167995268200e-2 + (0.67654845095518363577e-5 + (0.39184292949913591646e-7 + (0.21431552202133775150e-9 + (0.10994259106646731797e-11 + 0.52409949102222222221e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 21: {
|
||||
double t = 2*y100 - 43;
|
||||
return 0.38540888038840509795e-1 + (0.11364917134175420009e-2 + (0.70058230641246312003e-5 + (0.40943644083718586939e-7 + (0.22563034723692881631e-9 + (0.11642841011361992885e-11 + 0.55721092871111111110e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 22: {
|
||||
double t = 2*y100 - 45;
|
||||
return 0.40842225954785960651e-1 + (0.11650136437945673891e-2 + (0.72569945502343006619e-5 + (0.42796161861855042273e-7 + (0.23761401711005024162e-9 + (0.12332431172381557035e-11 + 0.59246802364444444445e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 23: {
|
||||
double t = 2*y100 - 47;
|
||||
return 0.43201627431540222422e-1 + (0.11945628793917272199e-2 + (0.75195743532849206263e-5 + (0.44747364553960993492e-7 + (0.25030885216472953674e-9 + (0.13065684400300476484e-11 + 0.63000532853333333334e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 24: {
|
||||
double t = 2*y100 - 49;
|
||||
return 0.45621193513810471438e-1 + (0.12251862608067529503e-2 + (0.77941720055551920319e-5 + (0.46803119830954460212e-7 + (0.26375990983978426273e-9 + (0.13845421370977119765e-11 + 0.66996477404444444445e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 25: {
|
||||
double t = 2*y100 - 51;
|
||||
return 0.48103121413299865517e-1 + (0.12569331386432195113e-2 + (0.80814333496367673980e-5 + (0.48969667335682018324e-7 + (0.27801515481905748484e-9 + (0.14674637611609884208e-11 + 0.71249589351111111110e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 26: {
|
||||
double t = 2*y100 - 53;
|
||||
return 0.50649709676983338501e-1 + (0.12898555233099055810e-2 + (0.83820428414568799654e-5 + (0.51253642652551838659e-7 + (0.29312563849675507232e-9 + (0.15556512782814827846e-11 + 0.75775607822222222221e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 27: {
|
||||
double t = 2*y100 - 55;
|
||||
return 0.53263363664388864181e-1 + (0.13240082443256975769e-2 + (0.86967260015007658418e-5 + (0.53662102750396795566e-7 + (0.30914568786634796807e-9 + (0.16494420240828493176e-11 + 0.80591079644444444445e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 28: {
|
||||
double t = 2*y100 - 57;
|
||||
return 0.55946601353500013794e-1 + (0.13594491197408190706e-2 + (0.90262520233016380987e-5 + (0.56202552975056695376e-7 + (0.32613310410503135996e-9 + (0.17491936862246367398e-11 + 0.85713381688888888890e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 29: {
|
||||
double t = 2*y100 - 59;
|
||||
return 0.58702059496154081813e-1 + (0.13962391363223647892e-2 + (0.93714365487312784270e-5 + (0.58882975670265286526e-7 + (0.34414937110591753387e-9 + (0.18552853109751857859e-11 + 0.91160736711111111110e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 30: {
|
||||
double t = 2*y100 - 61;
|
||||
return 0.61532500145144778048e-1 + (0.14344426411912015247e-2 + (0.97331446201016809696e-5 + (0.61711860507347175097e-7 + (0.36325987418295300221e-9 + (0.19681183310134518232e-11 + 0.96952238400000000000e-14 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 31: {
|
||||
double t = 2*y100 - 63;
|
||||
return 0.64440817576653297993e-1 + (0.14741275456383131151e-2 + (0.10112293819576437838e-4 + (0.64698236605933246196e-7 + (0.38353412915303665586e-9 + (0.20881176114385120186e-11 + 0.10310784480000000000e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 32: {
|
||||
double t = 2*y100 - 65;
|
||||
return 0.67430045633130393282e-1 + (0.15153655418916540370e-2 + (0.10509857606888328667e-4 + (0.67851706529363332855e-7 + (0.40504602194811140006e-9 + (0.22157325110542534469e-11 + 0.10964842115555555556e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 33: {
|
||||
double t = 2*y100 - 67;
|
||||
return 0.70503365513338850709e-1 + (0.15582323336495709827e-2 + (0.10926868866865231089e-4 + (0.71182482239613507542e-7 + (0.42787405890153386710e-9 + (0.23514379522274416437e-11 + 0.11659571751111111111e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 34: {
|
||||
double t = 2*y100 - 69;
|
||||
return 0.73664114037944596353e-1 + (0.16028078812438820413e-2 + (0.11364423678778207991e-4 + (0.74701423097423182009e-7 + (0.45210162777476488324e-9 + (0.24957355004088569134e-11 + 0.12397238257777777778e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 35: {
|
||||
double t = 2*y100 - 71;
|
||||
return 0.76915792420819562379e-1 + (0.16491766623447889354e-2 + (0.11823685320041302169e-4 + (0.78420075993781544386e-7 + (0.47781726956916478925e-9 + (0.26491544403815724749e-11 + 0.13180196462222222222e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 36: {
|
||||
double t = 2*y100 - 73;
|
||||
return 0.80262075578094612819e-1 + (0.16974279491709504117e-2 + (0.12305888517309891674e-4 + (0.82350717698979042290e-7 + (0.50511496109857113929e-9 + (0.28122528497626897696e-11 + 0.14010889635555555556e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 37: {
|
||||
double t = 2*y100 - 75;
|
||||
return 0.83706822008980357446e-1 + (0.17476561032212656962e-2 + (0.12812343958540763368e-4 + (0.86506399515036435592e-7 + (0.53409440823869467453e-9 + (0.29856186620887555043e-11 + 0.14891851591111111111e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 38: {
|
||||
double t = 2*y100 - 77;
|
||||
return 0.87254084284461718231e-1 + (0.17999608886001962327e-2 + (0.13344443080089492218e-4 + (0.90900994316429008631e-7 + (0.56486134972616465316e-9 + (0.31698707080033956934e-11 + 0.15825697795555555556e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 39: {
|
||||
double t = 2*y100 - 79;
|
||||
return 0.90908120182172748487e-1 + (0.18544478050657699758e-2 + (0.13903663143426120077e-4 + (0.95549246062549906177e-7 + (0.59752787125242054315e-9 + (0.33656597366099099413e-11 + 0.16815130613333333333e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 40: {
|
||||
double t = 2*y100 - 81;
|
||||
return 0.94673404508075481121e-1 + (0.19112284419887303347e-2 + (0.14491572616545004930e-4 + (0.10046682186333613697e-6 + (0.63221272959791000515e-9 + (0.35736693975589130818e-11 + 0.17862931591111111111e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 41: {
|
||||
double t = 2*y100 - 83;
|
||||
return 0.98554641648004456555e-1 + (0.19704208544725622126e-2 + (0.15109836875625443935e-4 + (0.10567036667675984067e-6 + (0.66904168640019354565e-9 + (0.37946171850824333014e-11 + 0.18971959040000000000e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 42: {
|
||||
double t = 2*y100 - 85;
|
||||
return 0.10255677889470089531e0 + (0.20321499629472857418e-2 + (0.15760224242962179564e-4 + (0.11117756071353507391e-6 + (0.70814785110097658502e-9 + (0.40292553276632563925e-11 + 0.20145143075555555556e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 43: {
|
||||
double t = 2*y100 - 87;
|
||||
return 0.10668502059865093318e0 + (0.20965479776148731610e-2 + (0.16444612377624983565e-4 + (0.11700717962026152749e-6 + (0.74967203250938418991e-9 + (0.42783716186085922176e-11 + 0.21385479360000000000e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 44: {
|
||||
double t = 2*y100 - 89;
|
||||
return 0.11094484319386444474e0 + (0.21637548491908170841e-2 + (0.17164995035719657111e-4 + (0.12317915750735938089e-6 + (0.79376309831499633734e-9 + (0.45427901763106353914e-11 + 0.22696025653333333333e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 45: {
|
||||
double t = 2*y100 - 91;
|
||||
return 0.11534201115268804714e0 + (0.22339187474546420375e-2 + (0.17923489217504226813e-4 + (0.12971465288245997681e-6 + (0.84057834180389073587e-9 + (0.48233721206418027227e-11 + 0.24079890062222222222e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 46: {
|
||||
double t = 2*y100 - 93;
|
||||
return 0.11988259392684094740e0 + (0.23071965691918689601e-2 + (0.18722342718958935446e-4 + (0.13663611754337957520e-6 + (0.89028385488493287005e-9 + (0.51210161569225846701e-11 + 0.25540227111111111111e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 47: {
|
||||
double t = 2*y100 - 95;
|
||||
return 0.12457298393509812907e0 + (0.23837544771809575380e-2 + (0.19563942105711612475e-4 + (0.14396736847739470782e-6 + (0.94305490646459247016e-9 + (0.54366590583134218096e-11 + 0.27080225920000000000e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 48: {
|
||||
double t = 2*y100 - 97;
|
||||
return 0.12941991566142438816e0 + (0.24637684719508859484e-2 + (0.20450821127475879816e-4 + (0.15173366280523906622e-6 + (0.99907632506389027739e-9 + (0.57712760311351625221e-11 + 0.28703099555555555556e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 49: {
|
||||
double t = 2*y100 - 99;
|
||||
return 0.13443048593088696613e0 + (0.25474249981080823877e-2 + (0.21385669591362915223e-4 + (0.15996177579900443030e-6 + (0.10585428844575134013e-8 + (0.61258809536787882989e-11 + 0.30412080142222222222e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 50: {
|
||||
double t = 2*y100 - 101;
|
||||
return 0.13961217543434561353e0 + (0.26349215871051761416e-2 + (0.22371342712572567744e-4 + (0.16868008199296822247e-6 + (0.11216596910444996246e-8 + (0.65015264753090890662e-11 + 0.32210394506666666666e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 51: {
|
||||
double t = 2*y100 - 103;
|
||||
return 0.14497287157673800690e0 + (0.27264675383982439814e-2 + (0.23410870961050950197e-4 + (0.17791863939526376477e-6 + (0.11886425714330958106e-8 + (0.68993039665054288034e-11 + 0.34101266222222222221e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 52: {
|
||||
double t = 2*y100 - 105;
|
||||
return 0.15052089272774618151e0 + (0.28222846410136238008e-2 + (0.24507470422713397006e-4 + (0.18770927679626136909e-6 + (0.12597184587583370712e-8 + (0.73203433049229821618e-11 + 0.36087889048888888890e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 53: {
|
||||
double t = 2*y100 - 107;
|
||||
return 0.15626501395774612325e0 + (0.29226079376196624949e-2 + (0.25664553693768450545e-4 + (0.19808568415654461964e-6 + (0.13351257759815557897e-8 + (0.77658124891046760667e-11 + 0.38173420035555555555e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 54: {
|
||||
double t = 2*y100 - 109;
|
||||
return 0.16221449434620737567e0 + (0.30276865332726475672e-2 + (0.26885741326534564336e-4 + (0.20908350604346384143e-6 + (0.14151148144240728728e-8 + (0.82369170665974313027e-11 + 0.40360957457777777779e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 55: {
|
||||
double t = 2*y100 - 111;
|
||||
return 0.16837910595412130659e0 + (0.31377844510793082301e-2 + (0.28174873844911175026e-4 + (0.22074043807045782387e-6 + (0.14999481055996090039e-8 + (0.87348993661930809254e-11 + 0.42653528977777777779e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 56: {
|
||||
double t = 2*y100 - 113;
|
||||
return 0.17476916455659369953e0 + (0.32531815370903068316e-2 + (0.29536024347344364074e-4 + (0.23309632627767074202e-6 + (0.15899007843582444846e-8 + (0.92610375235427359475e-11 + 0.45054073102222222221e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 57: {
|
||||
double t = 2*y100 - 115;
|
||||
return 0.18139556223643701364e0 + (0.33741744168096996041e-2 + (0.30973511714709500836e-4 + (0.24619326937592290996e-6 + (0.16852609412267750744e-8 + (0.98166442942854895573e-11 + 0.47565418097777777779e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 58: {
|
||||
double t = 2*y100 - 117;
|
||||
return 0.18826980194443664549e0 + (0.35010775057740317997e-2 + (0.32491914440014267480e-4 + (0.26007572375886319028e-6 + (0.17863299617388376116e-8 + (0.10403065638343878679e-10 + 0.50190265831111111110e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 59: {
|
||||
double t = 2*y100 - 119;
|
||||
return 0.19540403413693967350e0 + (0.36342240767211326315e-2 + (0.34096085096200907289e-4 + (0.27479061117017637474e-6 + (0.18934228504790032826e-8 + (0.11021679075323598664e-10 + 0.52931171733333333334e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 60: {
|
||||
double t = 2*y100 - 121;
|
||||
return 0.20281109560651886959e0 + (0.37739673859323597060e-2 + (0.35791165457592409054e-4 + (0.29038742889416172404e-6 + (0.20068685374849001770e-8 + (0.11673891799578381999e-10 + 0.55790523093333333334e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 61: {
|
||||
double t = 2*y100 - 123;
|
||||
return 0.21050455062669334978e0 + (0.39206818613925652425e-2 + (0.37582602289680101704e-4 + (0.30691836231886877385e-6 + (0.21270101645763677824e-8 + (0.12361138551062899455e-10 + 0.58770520160000000000e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 62: {
|
||||
double t = 2*y100 - 125;
|
||||
return 0.21849873453703332479e0 + (0.40747643554689586041e-2 + (0.39476163820986711501e-4 + (0.32443839970139918836e-6 + (0.22542053491518680200e-8 + (0.13084879235290858490e-10 + 0.61873153262222222221e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 63: {
|
||||
double t = 2*y100 - 127;
|
||||
return 0.22680879990043229327e0 + (0.42366354648628516935e-2 + (0.41477956909656896779e-4 + (0.34300544894502810002e-6 + (0.23888264229264067658e-8 + (0.13846596292818514601e-10 + 0.65100183751111111110e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 64: {
|
||||
double t = 2*y100 - 129;
|
||||
return 0.23545076536988703937e0 + (0.44067409206365170888e-2 + (0.43594444916224700881e-4 + (0.36268045617760415178e-6 + (0.25312606430853202748e-8 + (0.14647791812837903061e-10 + 0.68453122631111111110e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 65: {
|
||||
double t = 2*y100 - 131;
|
||||
return 0.24444156740777432838e0 + (0.45855530511605787178e-2 + (0.45832466292683085475e-4 + (0.38352752590033030472e-6 + (0.26819103733055603460e-8 + (0.15489984390884756993e-10 + 0.71933206364444444445e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 66: {
|
||||
double t = 2*y100 - 133;
|
||||
return 0.25379911500634264643e0 + (0.47735723208650032167e-2 + (0.48199253896534185372e-4 + (0.40561404245564732314e-6 + (0.28411932320871165585e-8 + (0.16374705736458320149e-10 + 0.75541379822222222221e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 67: {
|
||||
double t = 2*y100 - 135;
|
||||
return 0.26354234756393613032e0 + (0.49713289477083781266e-2 + (0.50702455036930367504e-4 + (0.42901079254268185722e-6 + (0.30095422058900481753e-8 + (0.17303497025347342498e-10 + 0.79278273368888888890e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 68: {
|
||||
double t = 2*y100 - 137;
|
||||
return 0.27369129607732343398e0 + (0.51793846023052643767e-2 + (0.53350152258326602629e-4 + (0.45379208848865015485e-6 + (0.31874057245814381257e-8 + (0.18277905010245111046e-10 + 0.83144182364444444445e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 69: {
|
||||
double t = 2*y100 - 139;
|
||||
return 0.28426714781640316172e0 + (0.53983341916695141966e-2 + (0.56150884865255810638e-4 + (0.48003589196494734238e-6 + (0.33752476967570796349e-8 + (0.19299477888083469086e-10 + 0.87139049137777777779e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 70: {
|
||||
double t = 2*y100 - 141;
|
||||
return 0.29529231465348519920e0 + (0.56288077305420795663e-2 + (0.59113671189913307427e-4 + (0.50782393781744840482e-6 + (0.35735475025851713168e-8 + (0.20369760937017070382e-10 + 0.91262442613333333334e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 71: {
|
||||
double t = 2*y100 - 143;
|
||||
return 0.30679050522528838613e0 + (0.58714723032745403331e-2 + (0.62248031602197686791e-4 + (0.53724185766200945789e-6 + (0.37827999418960232678e-8 + (0.21490291930444538307e-10 + 0.95513539182222222221e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 72: {
|
||||
double t = 2*y100 - 145;
|
||||
return 0.31878680111173319425e0 + (0.61270341192339103514e-2 + (0.65564012259707640976e-4 + (0.56837930287837738996e-6 + (0.40035151353392378882e-8 + (0.22662596341239294792e-10 + 0.99891109760000000000e-13 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 73: {
|
||||
double t = 2*y100 - 147;
|
||||
return 0.33130773722152622027e0 + (0.63962406646798080903e-2 + (0.69072209592942396666e-4 + (0.60133006661885941812e-6 + (0.42362183765883466691e-8 + (0.23888182347073698382e-10 + 0.10439349811555555556e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 74: {
|
||||
double t = 2*y100 - 149;
|
||||
return 0.34438138658041336523e0 + (0.66798829540414007258e-2 + (0.72783795518603561144e-4 + (0.63619220443228800680e-6 + (0.44814499336514453364e-8 + (0.25168535651285475274e-10 + 0.10901861383111111111e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 75: {
|
||||
double t = 2*y100 - 151;
|
||||
return 0.35803744972380175583e0 + (0.69787978834882685031e-2 + (0.76710543371454822497e-4 + (0.67306815308917386747e-6 + (0.47397647975845228205e-8 + (0.26505114141143050509e-10 + 0.11376390933333333333e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 76: {
|
||||
double t = 2*y100 - 153;
|
||||
return 0.37230734890119724188e0 + (0.72938706896461381003e-2 + (0.80864854542670714092e-4 + (0.71206484718062688779e-6 + (0.50117323769745883805e-8 + (0.27899342394100074165e-10 + 0.11862637614222222222e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 77: {
|
||||
double t = 2*y100 - 155;
|
||||
return 0.38722432730555448223e0 + (0.76260375162549802745e-2 + (0.85259785810004603848e-4 + (0.75329383305171327677e-6 + (0.52979361368388119355e-8 + (0.29352606054164086709e-10 + 0.12360253370666666667e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 78: {
|
||||
double t = 2*y100 - 157;
|
||||
return 0.40282355354616940667e0 + (0.79762880915029728079e-2 + (0.89909077342438246452e-4 + (0.79687137961956194579e-6 + (0.55989731807360403195e-8 + (0.30866246101464869050e-10 + 0.12868841946666666667e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 79: {
|
||||
double t = 2*y100 - 159;
|
||||
return 0.41914223158913787649e0 + (0.83456685186950463538e-2 + (0.94827181359250161335e-4 + (0.84291858561783141014e-6 + (0.59154537751083485684e-8 + (0.32441553034347469291e-10 + 0.13387957943111111111e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 80: {
|
||||
double t = 2*y100 - 161;
|
||||
return 0.43621971639463786896e0 + (0.87352841828289495773e-2 + (0.10002929142066799966e-3 + (0.89156148280219880024e-6 + (0.62480008150788597147e-8 + (0.34079760983458878910e-10 + 0.13917107176888888889e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 81: {
|
||||
double t = 2*y100 - 163;
|
||||
return 0.45409763548534330981e0 + (0.91463027755548240654e-2 + (0.10553137232446167258e-3 + (0.94293113464638623798e-6 + (0.65972492312219959885e-8 + (0.35782041795476563662e-10 + 0.14455745872000000000e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 82: {
|
||||
double t = 2*y100 - 165;
|
||||
return 0.47282001668512331468e0 + (0.95799574408860463394e-2 + (0.11135019058000067469e-3 + (0.99716373005509038080e-6 + (0.69638453369956970347e-8 + (0.37549499088161345850e-10 + 0.15003280712888888889e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 83: {
|
||||
double t = 2*y100 - 167;
|
||||
return 0.49243342227179841649e0 + (0.10037550043909497071e-1 + (0.11750334542845234952e-3 + (0.10544006716188967172e-5 + (0.73484461168242224872e-8 + (0.39383162326435752965e-10 + 0.15559069118222222222e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 84: {
|
||||
double t = 2*y100 - 169;
|
||||
return 0.51298708979209258326e0 + (0.10520454564612427224e-1 + (0.12400930037494996655e-3 + (0.11147886579371265246e-5 + (0.77517184550568711454e-8 + (0.41283980931872622611e-10 + 0.16122419680000000000e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 85: {
|
||||
double t = 2*y100 - 171;
|
||||
return 0.53453307979101369843e0 + (0.11030120618800726938e-1 + (0.13088741519572269581e-3 + (0.11784797595374515432e-5 + (0.81743383063044825400e-8 + (0.43252818449517081051e-10 + 0.16692592640000000000e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 86: {
|
||||
double t = 2*y100 - 173;
|
||||
return 0.55712643071169299478e0 + (0.11568077107929735233e-1 + (0.13815797838036651289e-3 + (0.12456314879260904558e-5 + (0.86169898078969313597e-8 + (0.45290446811539652525e-10 + 0.17268801084444444444e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 87: {
|
||||
double t = 2*y100 - 175;
|
||||
return 0.58082532122519320968e0 + (0.12135935999503877077e-1 + (0.14584223996665838559e-3 + (0.13164068573095710742e-5 + (0.90803643355106020163e-8 + (0.47397540713124619155e-10 + 0.17850211608888888889e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 88: {
|
||||
double t = 2*y100 - 177;
|
||||
return 0.60569124025293375554e0 + (0.12735396239525550361e-1 + (0.15396244472258863344e-3 + (0.13909744385382818253e-5 + (0.95651595032306228245e-8 + (0.49574672127669041550e-10 + 0.18435945564444444444e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 89: {
|
||||
double t = 2*y100 - 179;
|
||||
return 0.63178916494715716894e0 + (0.13368247798287030927e-1 + (0.16254186562762076141e-3 + (0.14695084048334056083e-5 + (0.10072078109604152350e-7 + (0.51822304995680707483e-10 + 0.19025081422222222222e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 90: {
|
||||
double t = 2*y100 - 181;
|
||||
return 0.65918774689725319200e0 + (0.14036375850601992063e-1 + (0.17160483760259706354e-3 + (0.15521885688723188371e-5 + (0.10601827031535280590e-7 + (0.54140790105837520499e-10 + 0.19616655146666666667e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 91: {
|
||||
double t = 2*y100 - 183;
|
||||
return 0.68795950683174433822e0 + (0.14741765091365869084e-1 + (0.18117679143520433835e-3 + (0.16392004108230585213e-5 + (0.11155116068018043001e-7 + (0.56530360194925690374e-10 + 0.20209663662222222222e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 92: {
|
||||
double t = 2*y100 - 185;
|
||||
return 0.71818103808729967036e0 + (0.15486504187117112279e-1 + (0.19128428784550923217e-3 + (0.17307350969359975848e-5 + (0.11732656736113607751e-7 + (0.58991125287563833603e-10 + 0.20803065333333333333e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 93: {
|
||||
double t = 2*y100 - 187;
|
||||
return 0.74993321911726254661e0 + (0.16272790364044783382e-1 + (0.20195505163377912645e-3 + (0.18269894883203346953e-5 + (0.12335161021630225535e-7 + (0.61523068312169087227e-10 + 0.21395783431111111111e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 94: {
|
||||
double t = 2*y100 - 189;
|
||||
return 0.78330143531283492729e0 + (0.17102934132652429240e-1 + (0.21321800585063327041e-3 + (0.19281661395543913713e-5 + (0.12963340087354341574e-7 + (0.64126040998066348872e-10 + 0.21986708942222222222e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 95: {
|
||||
double t = 2*y100 - 191;
|
||||
return 0.81837581041023811832e0 + (0.17979364149044223802e-1 + (0.22510330592753129006e-3 + (0.20344732868018175389e-5 + (0.13617902941839949718e-7 + (0.66799760083972474642e-10 + 0.22574701262222222222e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 96: {
|
||||
double t = 2*y100 - 193;
|
||||
return 0.85525144775685126237e0 + (0.18904632212547561026e-1 + (0.23764237370371255638e-3 + (0.21461248251306387979e-5 + (0.14299555071870523786e-7 + (0.69543803864694171934e-10 + 0.23158593688888888889e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 97: {
|
||||
double t = 2*y100 - 195;
|
||||
return 0.89402868170849933734e0 + (0.19881418399127202569e-1 + (0.25086793128395995798e-3 + (0.22633402747585233180e-5 + (0.15008997042116532283e-7 + (0.72357609075043941261e-10 + 0.23737194737777777778e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 98: {
|
||||
double t = 2*y100 - 197;
|
||||
return 0.93481333942870796363e0 + (0.20912536329780368893e-1 + (0.26481403465998477969e-3 + (0.23863447359754921676e-5 + (0.15746923065472184451e-7 + (0.75240468141720143653e-10 + 0.24309291271111111111e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
case 99: {
|
||||
double t = 2*y100 - 199;
|
||||
return 0.97771701335885035464e0 + (0.22000938572830479551e-1 + (0.27951610702682383001e-3 + (0.25153688325245314530e-5 + (0.16514019547822821453e-7 + (0.78191526829368231251e-10 + 0.24873652355555555556e-12 * t) * t) * t) * t) * t) * t;
|
||||
}
|
||||
}
|
||||
/* we only get here if y = 1, i.e. |x| < 4*eps, in which case
|
||||
* erfcx is within 1e-15 of 1.. */
|
||||
return 1.0;
|
||||
} /* erfcx_y100 */
|
||||
|
||||
/* optimizer friendly implementation of exp2(x).
|
||||
*
|
||||
* strategy:
|
||||
*
|
||||
* split argument into an integer part and a fraction:
|
||||
* ipart = floor(x+0.5);
|
||||
* fpart = x - ipart;
|
||||
*
|
||||
* compute exp2(ipart) from setting the ieee754 exponent
|
||||
* compute exp2(fpart) using a pade' approximation for x in [-0.5;0.5[
|
||||
*
|
||||
* the result becomes: exp2(x) = exp2(ipart) * exp2(fpart)
|
||||
*/
|
||||
|
||||
/* IEEE 754 double precision floating point data manipulation */
|
||||
typedef union
|
||||
{
|
||||
double f;
|
||||
uint64_t u;
|
||||
struct {int32_t i0,i1;} s;
|
||||
} udi_t;
|
||||
|
||||
static const double fm_exp2_q[] = {
|
||||
/* 1.00000000000000000000e0, */
|
||||
2.33184211722314911771e2,
|
||||
4.36821166879210612817e3
|
||||
};
|
||||
static const double fm_exp2_p[] = {
|
||||
2.30933477057345225087e-2,
|
||||
2.02020656693165307700e1,
|
||||
1.51390680115615096133e3
|
||||
};
|
||||
|
||||
double MathSpecialKokkos::exp2_x86(double x)
|
||||
{
|
||||
double ipart, fpart, px, qx;
|
||||
udi_t epart;
|
||||
|
||||
ipart = floor(x+0.5);
|
||||
fpart = x - ipart;
|
||||
epart.s.i0 = 0;
|
||||
epart.s.i1 = (((int) ipart) + 1023) << 20;
|
||||
|
||||
x = fpart*fpart;
|
||||
|
||||
px = fm_exp2_p[0];
|
||||
px = px*x + fm_exp2_p[1];
|
||||
qx = x + fm_exp2_q[0];
|
||||
px = px*x + fm_exp2_p[2];
|
||||
qx = qx*x + fm_exp2_q[1];
|
||||
|
||||
px = px * fpart;
|
||||
|
||||
x = 1.0 + 2.0*(px/(qx-px));
|
||||
return epart.f*x;
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifndef LMP_MATH_SPECIAL_KOKKOS_H
|
||||
#define LMP_MATH_SPECIAL_KOKKOS_H
|
||||
|
||||
#include <math.h>
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
namespace MathSpecialKokkos {
|
||||
|
||||
// support function for scaled error function complement
|
||||
|
||||
extern double erfcx_y100(const double y100);
|
||||
|
||||
// fast 2**x function without argument checks for little endian CPUs
|
||||
extern double exp2_x86(double x);
|
||||
|
||||
// scaled error function complement exp(x*x)*erfc(x) for coul/long styles
|
||||
|
||||
static inline double my_erfcx(const double x)
|
||||
{
|
||||
if (x >= 0.0) return erfcx_y100(400.0/(4.0+x));
|
||||
else return 2.0*exp(x*x) - erfcx_y100(400.0/(4.0-x));
|
||||
}
|
||||
|
||||
// exp(-x*x) for coul/long styles
|
||||
|
||||
static inline double expmsq(double x)
|
||||
{
|
||||
x *= x;
|
||||
x *= 1.4426950408889634074; // log_2(e)
|
||||
#if defined(__BYTE_ORDER__)
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
return (x < 1023.0) ? exp2_x86(-x) : 0.0;
|
||||
#endif
|
||||
#endif
|
||||
return (x < 1023.0) ? exp2(-x) : 0.0;
|
||||
}
|
||||
|
||||
// x**2, use instead of pow(x,2.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double square(const double &x) { return x*x; }
|
||||
|
||||
// x**3, use instead of pow(x,3.0)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double cube(const double &x) { return x*x*x; }
|
||||
|
||||
// return -1.0 for odd n, 1.0 for even n, like pow(-1.0,n)
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powsign(const int n) { return (n & 1) ? -1.0 : 1.0; }
|
||||
|
||||
// optimized version of pow(x,n) with n being integer
|
||||
// up to 10x faster than pow(x,y)
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powint(const double &x, const int n) {
|
||||
double yy,ww;
|
||||
|
||||
if (x == 0.0) return 0.0;
|
||||
int nn = (n > 0) ? n : -n;
|
||||
ww = x;
|
||||
|
||||
for (yy = 1.0; nn != 0; nn >>= 1, ww *=ww)
|
||||
if (nn & 1) yy *= ww;
|
||||
|
||||
return (n > 0) ? yy : 1.0/yy;
|
||||
}
|
||||
|
||||
// optimized version of (sin(x)/x)**n with n being a _positive_ integer
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static double powsinxx(const double &x, int n) {
|
||||
double yy,ww;
|
||||
|
||||
if (x == 0.0) return 1.0;
|
||||
|
||||
ww = sin(x)/x;
|
||||
|
||||
for (yy = 1.0; n != 0; n >>= 1, ww *=ww)
|
||||
if (n & 1) yy *= ww;
|
||||
|
||||
return yy;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -115,6 +115,19 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
memory->destroy_kokkos(k_eatom,eatom);
|
||||
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
||||
d_eatom = k_eatom.view<DeviceType>();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
memory->destroy_kokkos(k_vatom,vatom);
|
||||
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
|
||||
d_vatom = k_vatom.view<DeviceType>();
|
||||
}
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
k_cut_ljsq.template sync<DeviceType>();
|
||||
|
@ -169,6 +182,16 @@ void PairLJCharmmCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
k_vatom.template modify<DeviceType>();
|
||||
k_vatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
copymode = 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -68,6 +68,10 @@ PairLJCutCoulLongKokkos<DeviceType>::PairLJCutCoulLongKokkos(LAMMPS *lmp):PairLJ
|
|||
template<class DeviceType>
|
||||
PairLJCutCoulLongKokkos<DeviceType>::~PairLJCutCoulLongKokkos()
|
||||
{
|
||||
memory->destroy_kokkos(k_eatom,eatom);
|
||||
memory->destroy_kokkos(k_vatom,vatom);
|
||||
eatom = NULL;
|
||||
vatom = NULL;
|
||||
if (allocated){
|
||||
memory->destroy_kokkos(k_cutsq, cutsq);
|
||||
memory->destroy_kokkos(k_cut_ljsq, cut_ljsq);
|
||||
|
@ -100,6 +104,20 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = vflag_fdotr = 0;
|
||||
|
||||
|
||||
// reallocate per-atom arrays if necessary
|
||||
|
||||
if (eflag_atom) {
|
||||
memory->destroy_kokkos(k_eatom,eatom);
|
||||
memory->create_kokkos(k_eatom,eatom,maxeatom,"pair:eatom");
|
||||
d_eatom = k_eatom.view<DeviceType>();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
memory->destroy_kokkos(k_vatom,vatom);
|
||||
memory->create_kokkos(k_vatom,vatom,maxvatom,6,"pair:vatom");
|
||||
d_vatom = k_vatom.view<DeviceType>();
|
||||
}
|
||||
|
||||
atomKK->sync(execution_space,datamask_read);
|
||||
k_cutsq.template sync<DeviceType>();
|
||||
k_cut_ljsq.template sync<DeviceType>();
|
||||
|
@ -151,6 +169,16 @@ void PairLJCutCoulLongKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
}
|
||||
|
||||
if (vflag_fdotr) pair_virial_fdotr_compute(this);
|
||||
|
||||
if (eflag_atom) {
|
||||
k_eatom.template modify<DeviceType>();
|
||||
k_eatom.template sync<LMPHostType>();
|
||||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
k_vatom.template modify<DeviceType>();
|
||||
k_vatom.template sync<LMPHostType>();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
|
|
@ -87,6 +87,9 @@ class PairLJCutCoulLongKokkos : public PairLJCutCoulLong {
|
|||
typename ArrayTypes<DeviceType>::t_f_array f;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_randomread type;
|
||||
typename ArrayTypes<DeviceType>::t_float_1d_randomread q;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
|
|
|
@ -660,6 +660,8 @@ void PairReaxCKokkos<DeviceType>::LR_vdW_Coulomb( int i, int j, double r_ij, LR_
|
|||
template<class DeviceType>
|
||||
void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
{
|
||||
copymode = 1;
|
||||
|
||||
bocnt = hbcnt = 0;
|
||||
|
||||
eflag = eflag_in;
|
||||
|
@ -703,8 +705,6 @@ void PairReaxCKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
|||
pvector[i] = 0.0;
|
||||
}
|
||||
|
||||
copymode = 1;
|
||||
|
||||
EV_FLOAT_REAX ev;
|
||||
EV_FLOAT_REAX ev_all;
|
||||
|
||||
|
@ -1363,6 +1363,23 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxZero, const int &n) const {
|
|||
d_dDeltap_self(n,j) = 0.0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairReaxCKokkos<DeviceType>::operator()(PairReaxZeroEAtom, const int &i) const {
|
||||
v_eatom(i) = 0.0;
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void PairReaxCKokkos<DeviceType>::operator()(PairReaxZeroVAtom, const int &i) const {
|
||||
v_vatom(i,0) = 0.0;
|
||||
v_vatom(i,1) = 0.0;
|
||||
v_vatom(i,2) = 0.0;
|
||||
v_vatom(i,3) = 0.0;
|
||||
v_vatom(i,4) = 0.0;
|
||||
v_vatom(i,5) = 0.0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
@ -2398,7 +2415,7 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeAngular<NEIGHFLAG,EV
|
|||
F_FLOAT BOA_ij, BOA_ik, rij, bo_ij, bo_ik;
|
||||
F_FLOAT dcos_theta_di[3], dcos_theta_dj[3], dcos_theta_dk[3];
|
||||
F_FLOAT eng_tmp, fi_tmp[3], fj_tmp[3], fk_tmp[3];
|
||||
F_FLOAT delij[3], delik[3];
|
||||
F_FLOAT delij[3], delik[3], delji[3], delki[3];
|
||||
|
||||
p_val6 = gp[14];
|
||||
p_val8 = gp[33];
|
||||
|
@ -2648,8 +2665,10 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeAngular<NEIGHFLAG,EV
|
|||
if (EVFLAG) {
|
||||
eng_tmp = e_ang + e_pen + e_coa;
|
||||
//if (eflag_atom) this->template ev_tally<NEIGHFLAG>(ev,i,j,eng_tmp,0.0,0.0,0.0,0.0);
|
||||
for (int d = 0; d < 3; d++) delki[d] = -1.0 * delik[d];
|
||||
for (int d = 0; d < 3; d++) delji[d] = -1.0 * delij[d];
|
||||
if (eflag_atom) this->template e_tally<NEIGHFLAG>(ev,i,j,eng_tmp);
|
||||
if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj_tmp,fk_tmp,delij,delik);
|
||||
if (vflag_either) this->template v_tally3<NEIGHFLAG>(ev,i,j,k,fj_tmp,fk_tmp,delji,delki);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -2884,31 +2903,34 @@ void PairReaxCKokkos<DeviceType>::operator()(PairReaxComputeTorsion<NEIGHFLAG,EV
|
|||
if( arg > 1.0 ) arg = 1.0;
|
||||
if( arg < -1.0 ) arg = -1.0;
|
||||
|
||||
if( sin_ijk >= 0 && sin_ijk <= 1e-10 ) sin_ijk = 1e-10;
|
||||
else if( sin_ijk <= 0 && sin_ijk >= -1e-10 ) sin_ijk = -1e-10;
|
||||
if( sin_jil >= 0 && sin_jil <= 1e-10 ) sin_jil = 1e-10;
|
||||
else if( sin_jil <= 0 && sin_jil >= -1e-10 ) sin_jil = -1e-10;
|
||||
F_FLOAT sin_ijk_rnd = sin_ijk;
|
||||
F_FLOAT sin_jil_rnd = sin_jil;
|
||||
|
||||
if( sin_ijk >= 0 && sin_ijk <= 1e-10 ) sin_ijk_rnd = 1e-10;
|
||||
else if( sin_ijk <= 0 && sin_ijk >= -1e-10 ) sin_ijk_rnd = -1e-10;
|
||||
if( sin_jil >= 0 && sin_jil <= 1e-10 ) sin_jil_rnd = 1e-10;
|
||||
else if( sin_jil <= 0 && sin_jil >= -1e-10 ) sin_jil_rnd = -1e-10;
|
||||
|
||||
// dcos_omega_di
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dk[d] = ((htra-arg*hnra)/rik) * delik[d] - dellk[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dk[d] += (hthd-arg*hnhd)/sin_ijk * -dcos_ijk_dk[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dk[d] += (hthd-arg*hnhd)/sin_ijk_rnd * -dcos_ijk_dk[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dk[d] *= 2.0/poem;
|
||||
|
||||
// dcos_omega_dj
|
||||
for (int d = 0; d < 3; d++) dcos_omega_di[d] = -((htra-arg*hnra)/rik) * delik[d] - htrb/rij * delij[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthd-arg*hnhd)/sin_ijk * dcos_ijk_di[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthe-arg*hnhe)/sin_jil * dcos_jil_di[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthd-arg*hnhd)/sin_ijk_rnd * dcos_ijk_di[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_di[d] += -(hthe-arg*hnhe)/sin_jil_rnd * dcos_jil_di[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_di[d] *= 2.0/poem;
|
||||
|
||||
// dcos_omega_dk
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dj[d] = -((htrc-arg*hnrc)/rjl) * deljl[d] + htrb/rij * delij[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthd-arg*hnhd)/sin_ijk * dcos_ijk_dj[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthe-arg*hnhe)/sin_jil * dcos_jil_dj[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthd-arg*hnhd)/sin_ijk_rnd * dcos_ijk_dj[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dj[d] += -(hthe-arg*hnhe)/sin_jil_rnd * dcos_jil_dj[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dj[d] *= 2.0/poem;
|
||||
|
||||
// dcos_omega_dl
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dl[d] = ((htrc-arg*hnrc)/rjl) * deljl[d] + dellk[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dl[d] += (hthe-arg*hnhe)/sin_jil * -dcos_jil_dk[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dl[d] += (hthe-arg*hnhe)/sin_jil_rnd * -dcos_jil_dk[d];
|
||||
for (int d = 0; d < 3; d++) dcos_omega_dl[d] *= 2.0/poem;
|
||||
|
||||
cos_omega = cos( omega );
|
||||
|
@ -3728,12 +3750,12 @@ void PairReaxCKokkos<DeviceType>::v_tally3(EV_FLOAT_REAX &ev, const int &i, cons
|
|||
|
||||
F_FLOAT v[6];
|
||||
|
||||
v[0] = (drij[0]*fj[0] + drik[0]*fk[0]);
|
||||
v[1] = (drij[1]*fj[1] + drik[1]*fk[1]);
|
||||
v[2] = (drij[2]*fj[2] + drik[2]*fk[2]);
|
||||
v[3] = (drij[0]*fj[1] + drik[0]*fk[1]);
|
||||
v[4] = (drij[0]*fj[2] + drik[0]*fk[2]);
|
||||
v[5] = (drij[1]*fj[2] + drik[1]*fk[2]);
|
||||
v[0] = drij[0]*fj[0] + drik[0]*fk[0];
|
||||
v[1] = drij[1]*fj[1] + drik[1]*fk[1];
|
||||
v[2] = drij[2]*fj[2] + drik[2]*fk[2];
|
||||
v[3] = drij[0]*fj[1] + drik[0]*fk[1];
|
||||
v[4] = drij[0]*fj[2] + drik[0]*fk[2];
|
||||
v[5] = drij[1]*fj[2] + drik[1]*fk[2];
|
||||
|
||||
if (vflag_global) {
|
||||
ev.v[0] += v[0];
|
||||
|
@ -3745,12 +3767,12 @@ void PairReaxCKokkos<DeviceType>::v_tally3(EV_FLOAT_REAX &ev, const int &i, cons
|
|||
}
|
||||
|
||||
if (vflag_atom) {
|
||||
a_vatom(i,0) += THIRD*v[0]; a_vatom(i,1) += THIRD*v[1]; a_vatom(i,2) += THIRD*v[2];
|
||||
a_vatom(i,3) += THIRD*v[3]; a_vatom(i,4) += THIRD*v[4]; a_vatom(i,5) += THIRD*v[5];
|
||||
a_vatom(j,0) += THIRD*v[0]; a_vatom(j,1) += THIRD*v[1]; a_vatom(j,2) += THIRD*v[2];
|
||||
a_vatom(j,3) += THIRD*v[3]; a_vatom(j,4) += THIRD*v[4]; a_vatom(j,5) += THIRD*v[5];
|
||||
a_vatom(k,0) += THIRD*v[0]; a_vatom(k,1) += THIRD*v[1]; a_vatom(k,2) += THIRD*v[2];
|
||||
a_vatom(k,3) += THIRD*v[3]; a_vatom(k,4) += THIRD*v[4]; a_vatom(k,5) += THIRD*v[5];
|
||||
a_vatom(i,0) += THIRD * v[0]; a_vatom(i,1) += THIRD * v[1]; a_vatom(i,2) += THIRD * v[2];
|
||||
a_vatom(i,3) += THIRD * v[3]; a_vatom(i,4) += THIRD * v[4]; a_vatom(i,5) += THIRD * v[5];
|
||||
a_vatom(j,0) += THIRD * v[0]; a_vatom(j,1) += THIRD * v[1]; a_vatom(j,2) += THIRD * v[2];
|
||||
a_vatom(j,3) += THIRD * v[3]; a_vatom(j,4) += THIRD * v[4]; a_vatom(j,5) += THIRD * v[5];
|
||||
a_vatom(k,0) += THIRD * v[0]; a_vatom(k,1) += THIRD * v[1]; a_vatom(k,2) += THIRD * v[2];
|
||||
a_vatom(k,3) += THIRD * v[3]; a_vatom(k,4) += THIRD * v[4]; a_vatom(k,5) += THIRD * v[5];
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3767,12 +3789,12 @@ void PairReaxCKokkos<DeviceType>::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons
|
|||
// The vatom array is atomic for Half/Thread neighbor style
|
||||
F_FLOAT v[6];
|
||||
|
||||
v[0] = 0.25 * (dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0]);
|
||||
v[1] = 0.25 * (dril[1]*fi[1] + drjl[1]*fj[1] + drkl[1]*fk[1]);
|
||||
v[2] = 0.25 * (dril[2]*fi[2] + drjl[2]*fj[2] + drkl[2]*fk[2]);
|
||||
v[3] = 0.25 * (dril[0]*fi[1] + drjl[0]*fj[1] + drkl[0]*fk[1]);
|
||||
v[4] = 0.25 * (dril[0]*fi[2] + drjl[0]*fj[2] + drkl[0]*fk[2]);
|
||||
v[5] = 0.25 * (dril[1]*fi[2] + drjl[1]*fj[2] + drkl[1]*fk[2]);
|
||||
v[0] = dril[0]*fi[0] + drjl[0]*fj[0] + drkl[0]*fk[0];
|
||||
v[1] = dril[1]*fi[1] + drjl[1]*fj[1] + drkl[1]*fk[1];
|
||||
v[2] = dril[2]*fi[2] + drjl[2]*fj[2] + drkl[2]*fk[2];
|
||||
v[3] = dril[0]*fi[1] + drjl[0]*fj[1] + drkl[0]*fk[1];
|
||||
v[4] = dril[0]*fi[2] + drjl[0]*fj[2] + drkl[0]*fk[2];
|
||||
v[5] = dril[1]*fi[2] + drjl[1]*fj[2] + drkl[1]*fk[2];
|
||||
|
||||
if (vflag_global) {
|
||||
ev.v[0] += v[0];
|
||||
|
@ -3785,14 +3807,14 @@ void PairReaxCKokkos<DeviceType>::v_tally4(EV_FLOAT_REAX &ev, const int &i, cons
|
|||
|
||||
if (vflag_atom) {
|
||||
Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > a_vatom = v_vatom;
|
||||
a_vatom(i,0) += v[0]; a_vatom(i,1) += v[1]; a_vatom(i,2) += v[2];
|
||||
a_vatom(i,3) += v[3]; a_vatom(i,4) += v[4]; a_vatom(i,5) += v[5];
|
||||
a_vatom(j,0) += v[0]; a_vatom(j,1) += v[1]; a_vatom(j,2) += v[2];
|
||||
a_vatom(j,3) += v[3]; a_vatom(j,4) += v[4]; a_vatom(j,5) += v[5];
|
||||
a_vatom(k,0) += v[0]; a_vatom(k,1) += v[1]; a_vatom(k,2) += v[2];
|
||||
a_vatom(k,3) += v[3]; a_vatom(k,4) += v[4]; a_vatom(k,5) += v[5];
|
||||
a_vatom(l,0) += v[0]; a_vatom(l,1) += v[1]; a_vatom(l,2) += v[2];
|
||||
a_vatom(l,3) += v[3]; a_vatom(l,4) += v[4]; a_vatom(l,5) += v[5];
|
||||
a_vatom(i,0) += 0.25 * v[0]; a_vatom(i,1) += 0.25 * v[1]; a_vatom(i,2) += 0.25 * v[2];
|
||||
a_vatom(i,3) += 0.25 * v[3]; a_vatom(i,4) += 0.25 * v[4]; a_vatom(i,5) += 0.25 * v[5];
|
||||
a_vatom(j,0) += 0.25 * v[0]; a_vatom(j,1) += 0.25 * v[1]; a_vatom(j,2) += 0.25 * v[2];
|
||||
a_vatom(j,3) += 0.25 * v[3]; a_vatom(j,4) += 0.25 * v[4]; a_vatom(j,5) += 0.25 * v[5];
|
||||
a_vatom(k,0) += 0.25 * v[0]; a_vatom(k,1) += 0.25 * v[1]; a_vatom(k,2) += 0.25 * v[2];
|
||||
a_vatom(k,3) += 0.25 * v[3]; a_vatom(k,4) += 0.25 * v[4]; a_vatom(k,5) += 0.25 * v[5];
|
||||
a_vatom(l,0) += 0.25 * v[0]; a_vatom(l,1) += 0.25 * v[1]; a_vatom(l,2) += 0.25 * v[2];
|
||||
a_vatom(l,3) += 0.25 * v[3]; a_vatom(l,4) += 0.25 * v[4]; a_vatom(l,5) += 0.25 * v[5];
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -3894,6 +3916,14 @@ void PairReaxCKokkos<DeviceType>::ev_setup(int eflag, int vflag)
|
|||
|
||||
if (eflag_global) eng_vdwl = eng_coul = 0.0;
|
||||
if (vflag_global) for (i = 0; i < 6; i++) virial[i] = 0.0;
|
||||
if (eflag_atom) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroEAtom>(0,maxeatom),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
if (vflag_atom) {
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, PairReaxZeroVAtom>(0,maxvatom),*this);
|
||||
DeviceType::fence();
|
||||
}
|
||||
|
||||
// if vflag_global = 2 and pair::compute() calls virial_fdotr_compute()
|
||||
// compute global virial via (F dot r) instead of via pairwise summation
|
||||
|
|
|
@ -84,6 +84,10 @@ struct PairReaxBuildListsHalf_LessAtomics{};
|
|||
|
||||
struct PairReaxZero{};
|
||||
|
||||
struct PairReaxZeroEAtom{};
|
||||
|
||||
struct PairReaxZeroVAtom{};
|
||||
|
||||
struct PairReaxBondOrder1{};
|
||||
|
||||
struct PairReaxBondOrder1_LessAtomics{};
|
||||
|
@ -172,6 +176,12 @@ class PairReaxCKokkos : public PairReaxC {
|
|||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(PairReaxZero, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(PairReaxZeroEAtom, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(PairReaxZeroVAtom, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(PairReaxBondOrder1, const int&) const;
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,552 @@
|
|||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef KSPACE_CLASS
|
||||
|
||||
KSpaceStyle(pppm/kk,PPPMKokkos<LMPDeviceType>)
|
||||
KSpaceStyle(pppm/kk/device,PPPMKokkos<LMPDeviceType>)
|
||||
KSpaceStyle(pppm/kk/host,PPPMKokkos<LMPHostType>)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_PPPM_KOKKOS_H
|
||||
#define LMP_PPPM_KOKKOS_H
|
||||
|
||||
#include "pppm.h"
|
||||
#include "gridcomm_kokkos.h"
|
||||
#include "kokkos_type.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
struct TagPPPM_setup1{};
|
||||
struct TagPPPM_setup2{};
|
||||
struct TagPPPM_setup3{};
|
||||
struct TagPPPM_setup4{};
|
||||
struct TagPPPM_compute_gf_ik{};
|
||||
struct TagPPPM_compute_gf_ik_triclinic{};
|
||||
struct TagPPPM_self1{};
|
||||
struct TagPPPM_self2{};
|
||||
struct TagPPPM_brick2fft{};
|
||||
struct TagPPPM_particle_map{};
|
||||
struct TagPPPM_make_rho_zero{};
|
||||
struct TagPPPM_make_rho_atomic{};
|
||||
struct TagPPPM_make_rho{};
|
||||
struct TagPPPM_poisson_ik1{};
|
||||
struct TagPPPM_poisson_ik2{};
|
||||
struct TagPPPM_poisson_ik3{};
|
||||
struct TagPPPM_poisson_ik4{};
|
||||
struct TagPPPM_poisson_ik5{};
|
||||
struct TagPPPM_poisson_ik6{};
|
||||
struct TagPPPM_poisson_ik7{};
|
||||
struct TagPPPM_poisson_ik8{};
|
||||
struct TagPPPM_poisson_ik9{};
|
||||
struct TagPPPM_poisson_ik10{};
|
||||
struct TagPPPM_poisson_ik_triclinic1{};
|
||||
struct TagPPPM_poisson_ik_triclinic2{};
|
||||
struct TagPPPM_poisson_ik_triclinic3{};
|
||||
struct TagPPPM_poisson_ik_triclinic4{};
|
||||
struct TagPPPM_poisson_ik_triclinic5{};
|
||||
struct TagPPPM_poisson_ik_triclinic6{};
|
||||
struct TagPPPM_poisson_peratom1{};
|
||||
struct TagPPPM_poisson_peratom2{};
|
||||
struct TagPPPM_poisson_peratom3{};
|
||||
struct TagPPPM_poisson_peratom4{};
|
||||
struct TagPPPM_poisson_peratom5{};
|
||||
struct TagPPPM_poisson_peratom6{};
|
||||
struct TagPPPM_poisson_peratom7{};
|
||||
struct TagPPPM_poisson_peratom8{};
|
||||
struct TagPPPM_poisson_peratom9{};
|
||||
struct TagPPPM_poisson_peratom10{};
|
||||
struct TagPPPM_poisson_peratom11{};
|
||||
struct TagPPPM_poisson_peratom12{};
|
||||
struct TagPPPM_poisson_peratom13{};
|
||||
struct TagPPPM_poisson_peratom14{};
|
||||
struct TagPPPM_fieldforce_ik{};
|
||||
struct TagPPPM_fieldforce_peratom{};
|
||||
struct TagPPPM_pack_forward1{};
|
||||
struct TagPPPM_pack_forward2{};
|
||||
struct TagPPPM_unpack_forward1{};
|
||||
struct TagPPPM_unpack_forward2{};
|
||||
struct TagPPPM_pack_reverse{};
|
||||
struct TagPPPM_unpack_reverse{};
|
||||
struct TagPPPM_slabcorr1{};
|
||||
struct TagPPPM_slabcorr2{};
|
||||
struct TagPPPM_slabcorr3{};
|
||||
struct TagPPPM_slabcorr4{};
|
||||
struct TagPPPM_timing_zero{};
|
||||
|
||||
template<class DeviceType>
|
||||
class PPPMKokkos : public PPPM {
|
||||
public:
|
||||
typedef DeviceType device_type;
|
||||
typedef ArrayTypes<DeviceType> AT;
|
||||
|
||||
PPPMKokkos(class LAMMPS *, int, char **);
|
||||
virtual ~PPPMKokkos();
|
||||
virtual void init();
|
||||
virtual void setup();
|
||||
void setup_grid();
|
||||
virtual void compute(int, int);
|
||||
virtual int timing_1d(int, double &);
|
||||
virtual int timing_3d(int, double &);
|
||||
virtual double memory_usage();
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_setup1, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_setup2, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_setup3, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_setup4, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_compute_gf_ik, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_compute_gf_ik_triclinic, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_self1, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_self2, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_brick2fft, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_particle_map, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_make_rho_zero, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_make_rho_atomic, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_make_rho, typename Kokkos::TeamPolicy<DeviceType, TagPPPM_make_rho>::member_type) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik1, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik2, const int&, EV_FLOAT &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik3, const int&, EV_FLOAT &) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik4, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik5, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik6, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik7, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik8, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik9, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik10, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik_triclinic1, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik_triclinic2, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik_triclinic3, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik_triclinic4, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik_triclinic5, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_ik_triclinic6, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom1, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom2, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom3, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom4, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom5, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom6, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom7, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom8, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom9, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom10, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom11, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom12, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom13, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_poisson_peratom14, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_fieldforce_ik, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_fieldforce_peratom, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_pack_forward1, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_pack_forward2, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_unpack_forward1, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_unpack_forward2, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_pack_reverse, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_unpack_reverse, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_slabcorr1, const int&, double&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_slabcorr2, const int&, double&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_slabcorr3, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_slabcorr4, const int&) const;
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(TagPPPM_timing_zero, const int&) const;
|
||||
|
||||
//template<int NEIGHFLAG, int NEWTON_PAIR, int EVFLAG>
|
||||
//KOKKOS_INLINE_FUNCTION
|
||||
//void operator()(TagPPPMKernelA<NEIGHFLAG,NEWTON_PAIR,EVFLAG>, const int&) const;
|
||||
|
||||
protected:
|
||||
double unitkx,unitky,unitkz;
|
||||
double scaleinv,s2;
|
||||
double qscale,efact,ffact,dipole_all,dipole_r2,zprd;
|
||||
double xprd,yprd,zprd_slab;
|
||||
int nbx,nby,nbz,twoorder;
|
||||
int numx_fft,numy_fft,numz_fft;
|
||||
int numx_inout,numy_inout,numz_inout;
|
||||
int numx_out,numy_out,numz_out;
|
||||
int ix,iy,nlocal;
|
||||
|
||||
int nx,ny,nz;
|
||||
typename AT::t_int_1d_um d_list_index;
|
||||
typename AT::t_FFT_SCALAR_1d_um d_buf;
|
||||
|
||||
DAT::tdual_int_scalar k_flag;
|
||||
|
||||
typename AT::t_x_array_randomread x;
|
||||
typename AT::t_f_array f;
|
||||
typename AT::t_float_1d_randomread q;
|
||||
|
||||
DAT::tdual_efloat_1d k_eatom;
|
||||
DAT::tdual_virial_array k_vatom;
|
||||
typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
|
||||
typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
|
||||
|
||||
int factors[3];
|
||||
|
||||
typename AT::t_FFT_SCALAR_3d d_density_brick;
|
||||
typename AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick;
|
||||
typename AT::t_FFT_SCALAR_3d d_u_brick;
|
||||
typename AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick;
|
||||
typename AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick;
|
||||
typename AT::t_float_1d d_greensfn;
|
||||
typename AT::t_virial_array d_vg;
|
||||
typename AT::t_float_1d d_fkx;
|
||||
typename AT::t_float_1d d_fky;
|
||||
typename AT::t_float_1d d_fkz;
|
||||
DAT::tdual_FFT_SCALAR_1d k_density_fft;
|
||||
DAT::tdual_FFT_SCALAR_1d k_work1;
|
||||
DAT::tdual_FFT_SCALAR_1d k_work2;
|
||||
typename AT::t_FFT_SCALAR_1d d_density_fft;
|
||||
typename AT::t_FFT_SCALAR_1d d_work1;
|
||||
typename AT::t_FFT_SCALAR_1d d_work2;
|
||||
|
||||
DAT::tdual_float_1d k_gf_b;
|
||||
typename AT::t_float_1d d_gf_b;
|
||||
|
||||
//FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff;
|
||||
typename AT::t_FFT_SCALAR_2d_3 d_rho1d;
|
||||
DAT::tdual_FFT_SCALAR_2d k_rho_coeff;
|
||||
typename AT::t_FFT_SCALAR_2d d_rho_coeff;
|
||||
HAT::t_FFT_SCALAR_2d h_rho_coeff;
|
||||
//double **acons;
|
||||
typename Kokkos::DualView<F_FLOAT[8][7],Kokkos::LayoutRight,DeviceType>::t_host acons;
|
||||
|
||||
class FFT3d *fft1,*fft2;
|
||||
class Remap *remap;
|
||||
GridCommKokkos<DeviceType> *cg;
|
||||
GridCommKokkos<DeviceType> *cg_peratom;
|
||||
|
||||
//int **part2grid; // storage for particle -> grid mapping
|
||||
typename AT::t_int_1d_3 d_part2grid;
|
||||
|
||||
//double *boxlo;
|
||||
double boxlo[3];
|
||||
|
||||
void set_grid_global();
|
||||
void set_grid_local();
|
||||
void adjust_gewald();
|
||||
double newton_raphson_f();
|
||||
double derivf();
|
||||
double final_accuracy();
|
||||
|
||||
virtual void allocate();
|
||||
virtual void allocate_peratom();
|
||||
virtual void deallocate();
|
||||
virtual void deallocate_peratom();
|
||||
int factorable(int);
|
||||
double compute_df_kspace();
|
||||
double estimate_ik_error(double, double, bigint);
|
||||
virtual void compute_gf_denom();
|
||||
virtual void compute_gf_ik();
|
||||
|
||||
virtual void particle_map();
|
||||
virtual void make_rho();
|
||||
virtual void brick2fft();
|
||||
|
||||
virtual void poisson();
|
||||
virtual void poisson_ik();
|
||||
|
||||
virtual void fieldforce();
|
||||
virtual void fieldforce_ik();
|
||||
|
||||
virtual void poisson_peratom();
|
||||
virtual void fieldforce_peratom();
|
||||
void procs2grid2d(int,int,int,int *, int*);
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void compute_rho1d(const int i, const FFT_SCALAR &, const FFT_SCALAR &,
|
||||
const FFT_SCALAR &) const;
|
||||
void compute_rho_coeff();
|
||||
void slabcorr();
|
||||
|
||||
// grid communication
|
||||
|
||||
virtual void pack_forward_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
|
||||
virtual void unpack_forward_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
|
||||
virtual void pack_reverse_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
|
||||
virtual void unpack_reverse_kokkos(int, Kokkos::DualView<FFT_SCALAR*,Kokkos::LayoutRight,LMPDeviceType> &, int, DAT::tdual_int_2d &, int);
|
||||
|
||||
// triclinic
|
||||
|
||||
int triclinic; // domain settings, orthog or triclinic
|
||||
void setup_triclinic();
|
||||
void compute_gf_ik_triclinic();
|
||||
void poisson_ik_triclinic();
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
denominator for Hockney-Eastwood Green's function
|
||||
of x,y,z = sin(kx*deltax/2), etc
|
||||
|
||||
inf n-1
|
||||
S(n,k) = Sum W(k+pi*j)**2 = Sum b(l)*(z*z)**l
|
||||
j=-inf l=0
|
||||
|
||||
= -(z*z)**n /(2n-1)! * (d/dx)**(2n-1) cot(x) at z = sin(x)
|
||||
gf_b = denominator expansion coeffs
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
double gf_denom(const double &x, const double &y,
|
||||
const double &z) const {
|
||||
double sx,sy,sz;
|
||||
sz = sy = sx = 0.0;
|
||||
for (int l = order-1; l >= 0; l--) {
|
||||
sx = d_gf_b[l] + sx*x;
|
||||
sy = d_gf_b[l] + sy*y;
|
||||
sz = d_gf_b[l] + sz*z;
|
||||
}
|
||||
double s = sx*sy*sz;
|
||||
return s*s;
|
||||
};
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Cannot (yet) use PPPM with triclinic box and kspace_modify diff ad
|
||||
|
||||
This feature is not yet supported.
|
||||
|
||||
E: Cannot (yet) use PPPM with triclinic box and slab correction
|
||||
|
||||
This feature is not yet supported.
|
||||
|
||||
E: Cannot use PPPM with 2d simulation
|
||||
|
||||
The kspace style pppm cannot be used in 2d simulations. You can use
|
||||
2d PPPM in a 3d simulation; see the kspace_modify command.
|
||||
|
||||
E: PPPM can only currently be used with comm_style brick
|
||||
|
||||
This is a current restriction in LAMMPS.
|
||||
|
||||
E: Kspace style requires atom attribute q
|
||||
|
||||
The atom style defined does not have these attributes.
|
||||
|
||||
E: Cannot use nonperiodic boundaries with PPPM
|
||||
|
||||
For kspace style pppm, all 3 dimensions must have periodic boundaries
|
||||
unless you use the kspace_modify command to define a 2d slab with a
|
||||
non-periodic z dimension.
|
||||
|
||||
E: Incorrect boundaries with slab PPPM
|
||||
|
||||
Must have periodic x,y dimensions and non-periodic z dimension to use
|
||||
2d slab option with PPPM.
|
||||
|
||||
E: PPPM order cannot be < 2 or > than %d
|
||||
|
||||
This is a limitation of the PPPM implementation in LAMMPS.
|
||||
|
||||
E: KSpace style is incompatible with Pair style
|
||||
|
||||
Setting a kspace style requires that a pair style with matching
|
||||
long-range Coulombic or dispersion components be used.
|
||||
|
||||
E: Pair style is incompatible with TIP4P KSpace style
|
||||
|
||||
The pair style does not have the requires TIP4P settings.
|
||||
|
||||
E: Bond and angle potentials must be defined for TIP4P
|
||||
|
||||
Cannot use TIP4P pair potential unless bond and angle potentials
|
||||
are defined.
|
||||
|
||||
E: Bad TIP4P angle type for PPPM/TIP4P
|
||||
|
||||
Specified angle type is not valid.
|
||||
|
||||
E: Bad TIP4P bond type for PPPM/TIP4P
|
||||
|
||||
Specified bond type is not valid.
|
||||
|
||||
E: Cannot (yet) use PPPM with triclinic box and TIP4P
|
||||
|
||||
This feature is not yet supported.
|
||||
|
||||
W: Reducing PPPM order b/c stencil extends beyond nearest neighbor processor
|
||||
|
||||
This may lead to a larger grid than desired. See the kspace_modify overlap
|
||||
command to prevent changing of the PPPM order.
|
||||
|
||||
E: PPPM order < minimum allowed order
|
||||
|
||||
The default minimum order is 2. This can be reset by the
|
||||
kspace_modify minorder command.
|
||||
|
||||
E: PPPM grid stencil extends beyond nearest neighbor processor
|
||||
|
||||
This is not allowed if the kspace_modify overlap setting is no.
|
||||
|
||||
E: KSpace accuracy must be > 0
|
||||
|
||||
The kspace accuracy designated in the input must be greater than zero.
|
||||
|
||||
E: Could not compute grid size
|
||||
|
||||
The code is unable to compute a grid size consistent with the desired
|
||||
accuracy. This error should not occur for typical problems. Please
|
||||
send an email to the developers.
|
||||
|
||||
E: PPPM grid is too large
|
||||
|
||||
The global PPPM grid is larger than OFFSET in one or more dimensions.
|
||||
OFFSET is currently set to 4096. You likely need to decrease the
|
||||
requested accuracy.
|
||||
|
||||
E: Could not compute g_ewald
|
||||
|
||||
The Newton-Raphson solver failed to converge to a good value for
|
||||
g_ewald. This error should not occur for typical problems. Please
|
||||
send an email to the developers.
|
||||
|
||||
E: Non-numeric box dimensions - simulation unstable
|
||||
|
||||
The box size has apparently blown up.
|
||||
|
||||
E: Out of range atoms - cannot compute PPPM
|
||||
|
||||
One or more atoms are attempting to map their charge to a PPPM grid
|
||||
point that is not owned by a processor. This is likely for one of two
|
||||
reasons, both of them bad. First, it may mean that an atom near the
|
||||
boundary of a processor's sub-domain has moved more than 1/2 the
|
||||
"neighbor skin distance"_neighbor.html without neighbor lists being
|
||||
rebuilt and atoms being migrated to new processors. This also means
|
||||
you may be missing pairwise interactions that need to be computed.
|
||||
The solution is to change the re-neighboring criteria via the
|
||||
"neigh_modify"_neigh_modify command. The safest settings are "delay 0
|
||||
every 1 check yes". Second, it may mean that an atom has moved far
|
||||
outside a processor's sub-domain or even the entire simulation box.
|
||||
This indicates bad physics, e.g. due to highly overlapping atoms, too
|
||||
large a timestep, etc.
|
||||
|
||||
*/
|
|
@ -152,6 +152,8 @@ PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
|
|||
|
||||
PPPM::~PPPM()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
delete [] factors;
|
||||
deallocate();
|
||||
if (peratom_allocate_flag) deallocate_peratom();
|
||||
|
|
|
@ -1,5 +1,25 @@
|
|||
Smooth Mach Dynamics -- Smooth Particle Hydrodynamics for solids and fluids
|
||||
|
||||
Currently, the package has the following features:
|
||||
|
||||
Does liquids via traditional Smooth Particle Hydrodynamics (SPH)
|
||||
|
||||
Also solves solids mechanics problems via a state of the art
|
||||
stabilized meshless method with hourglass control.
|
||||
|
||||
Can specify hydrostatic interactions independently from material
|
||||
strength models, i.e. pressure and deviatoric stresses are separated.
|
||||
|
||||
Many material models available (Johnson-Cook, plasticity with
|
||||
hardening, Mie-Grueneisen, Polynomial EOS). Easy to add new material
|
||||
models.
|
||||
|
||||
Rigid boundary conditions (walls) can be loaded as surface geometries
|
||||
from *.STL files.
|
||||
|
||||
See the file doc/PDF/SMD_LAMMPS_userguide.pdf to get started.
|
||||
|
||||
There are example scripts for using this package in examples/USER/smd.
|
||||
The person who created this package is Georg Ganzenmuller at the
|
||||
Fraunhofer-Institute for High-Speed Dynamics, Ernst Mach Institute in
|
||||
Germany (georg.ganzenmueller at emi.fhg.de). Contact him directly if
|
||||
|
|
|
@ -87,6 +87,7 @@ class ModifyKokkos : public Modify {
|
|||
class DAT {
|
||||
public:
|
||||
typedef double tdual_xfloat_1d;
|
||||
typedef double tdual_FFT_SCALAR_1d;
|
||||
typedef int t_int_1d;
|
||||
typedef int tdual_int_2d;
|
||||
};
|
||||
|
|
|
@ -94,6 +94,7 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||
execution_space = Host;
|
||||
datamask_read = ALL_MASK;
|
||||
datamask_modify = ALL_MASK;
|
||||
copymode = 0;
|
||||
|
||||
memory->create(gcons,7,7,"kspace:gcons");
|
||||
gcons[2][0] = 15.0 / 8.0;
|
||||
|
@ -149,6 +150,8 @@ KSpace::KSpace(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
|||
|
||||
KSpace::~KSpace()
|
||||
{
|
||||
if (copymode) return;
|
||||
|
||||
memory->destroy(eatom);
|
||||
memory->destroy(vatom);
|
||||
memory->destroy(gcons);
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#define LMP_KSPACE_H
|
||||
|
||||
#include "pointers.h"
|
||||
#include "accelerator_kokkos.h"
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
typedef float FFT_SCALAR;
|
||||
|
@ -85,6 +86,7 @@ class KSpace : protected Pointers {
|
|||
// KOKKOS host/device flag and data masks
|
||||
ExecutionSpace execution_space;
|
||||
unsigned int datamask_read,datamask_modify;
|
||||
int copymode;
|
||||
|
||||
int compute_flag; // 0 if skip compute()
|
||||
int fftbench; // 0 if skip FFT timing
|
||||
|
@ -124,6 +126,11 @@ class KSpace : protected Pointers {
|
|||
virtual void pack_reverse(int, FFT_SCALAR *, int, int *) {};
|
||||
virtual void unpack_reverse(int, FFT_SCALAR *, int, int *) {};
|
||||
|
||||
virtual void pack_forward_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void unpack_forward_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void pack_reverse_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
virtual void unpack_reverse_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||
|
||||
virtual int timing(int, double &, double &) {return 0;}
|
||||
virtual int timing_1d(int, double &) {return 0;}
|
||||
virtual int timing_3d(int, double &) {return 0;}
|
||||
|
|
Loading…
Reference in New Issue