git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6622 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-08-08 19:40:03 +00:00
parent a54f7e6b5b
commit 8745580d5d
10 changed files with 944 additions and 948 deletions

View File

@ -43,6 +43,14 @@ using namespace LAMMPS_NS;
#define LARGE 10000.0
#define EPS_HOC 1.0e-7
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#define MAX(a,b) ((a) > (b) ? (a) : (b))
@ -50,7 +58,7 @@ using namespace LAMMPS_NS;
PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
{
if (narg != 1) error->all("Illegal kspace_style pppm command");
if (narg < 1) error->all("Illegal kspace_style pppm command");
precision = atof(arg[0]);
PI = 4.0*atan(1.0);
@ -754,7 +762,7 @@ void PPPM::allocate()
// summation coeffs
gf_b = new double[order];
memory->create(gf_b,order,"pppm:gf_b");
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
@ -778,7 +786,7 @@ void PPPM::allocate()
remap = new Remap(lmp,world,
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
1,0,0,2);
1,0,0,FFT_PRECISION);
}
/* ----------------------------------------------------------------------
@ -805,7 +813,7 @@ void PPPM::deallocate()
memory->destroy(buf1);
memory->destroy(buf2);
delete [] gf_b;
memory->destroy(gf_b);
memory->destroy2d_offset(rho1d,-order/2);
memory->destroy2d_offset(rho_coeff,(1-order)/2);
@ -967,17 +975,24 @@ void PPPM::set_grid()
// print info
if (me == 0) {
#ifdef FFT_SINGLE
const char fft_prec[] = "single";
#else
const char fft_prec[] = "double";
#endif
if (screen) {
fprintf(screen," G vector = %g\n",g_ewald);
fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(screen," stencil order = %d\n",order);
fprintf(screen," RMS precision = %g\n",MAX(lpr,spr));
fprintf(screen," using %s precision FFTs\n",fft_prec);
}
if (logfile) {
fprintf(logfile," G vector = %g\n",g_ewald);
fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
fprintf(logfile," stencil order = %d\n",order);
fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr));
fprintf(logfile," using %s precision FFTs\n",fft_prec);
}
}
}
@ -1036,7 +1051,7 @@ double PPPM::diffpr(double hx, double hy, double hz, double q2, double **acons)
lprz = rms(hz,zprd*slab_volfactor,natoms,q2,acons);
kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
sqrt(natoms*cutoff*xprd*yprd*zprd);
sqrt(static_cast<double>(natoms)*cutoff*xprd*yprd*zprd);
double value = kspace_prec - real_prec;
return value;
}
@ -1113,8 +1128,8 @@ void PPPM::brick2fft()
if (comm->procneigh[0][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
MPI_Wait(&request,&status);
}
@ -1137,8 +1152,8 @@ void PPPM::brick2fft()
if (comm->procneigh[0][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
MPI_Wait(&request,&status);
}
@ -1161,8 +1176,8 @@ void PPPM::brick2fft()
if (comm->procneigh[1][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
MPI_Wait(&request,&status);
}
@ -1185,8 +1200,8 @@ void PPPM::brick2fft()
if (comm->procneigh[1][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
MPI_Wait(&request,&status);
}
@ -1209,8 +1224,8 @@ void PPPM::brick2fft()
if (comm->procneigh[2][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
MPI_Wait(&request,&status);
}
@ -1233,8 +1248,8 @@ void PPPM::brick2fft()
if (comm->procneigh[2][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
MPI_Wait(&request,&status);
}
@ -1284,8 +1299,8 @@ void PPPM::fillbrick()
if (comm->procneigh[2][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
MPI_Wait(&request,&status);
}
@ -1314,8 +1329,8 @@ void PPPM::fillbrick()
if (comm->procneigh[2][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
MPI_Wait(&request,&status);
}
@ -1344,8 +1359,8 @@ void PPPM::fillbrick()
if (comm->procneigh[1][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
MPI_Wait(&request,&status);
}
@ -1374,8 +1389,8 @@ void PPPM::fillbrick()
if (comm->procneigh[1][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
MPI_Wait(&request,&status);
}
@ -1404,8 +1419,8 @@ void PPPM::fillbrick()
if (comm->procneigh[0][1] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
MPI_Wait(&request,&status);
}
@ -1434,8 +1449,8 @@ void PPPM::fillbrick()
if (comm->procneigh[0][0] == me)
for (i = 0; i < n; i++) buf2[i] = buf1[i];
else {
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request);
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world);
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
MPI_Wait(&request,&status);
}
@ -1497,12 +1512,12 @@ void PPPM::particle_map()
void PPPM::make_rho()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
// clear 3d density array
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = 0.0;
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -1554,7 +1569,7 @@ void PPPM::poisson(int eflag, int vflag)
n = 0;
for (i = 0; i < nfft; i++) {
work1[n++] = density_fft[i];
work1[n++] = 0.0;
work1[n++] = ZEROF;
}
fft1->compute(work1,work1,1);
@ -1667,8 +1682,8 @@ void PPPM::poisson(int eflag, int vflag)
void PPPM::fieldforce()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0;
double ek[3];
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -1679,6 +1694,7 @@ void PPPM::fieldforce()
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int nlocal = atom->nlocal;
for (i = 0; i < nlocal; i++) {
@ -1691,7 +1707,7 @@ void PPPM::fieldforce()
compute_rho1d(dx,dy,dz);
ek[0] = ek[1] = ek[2] = 0.0;
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
@ -1701,18 +1717,18 @@ void PPPM::fieldforce()
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ek[0] -= x0*vdx_brick[mz][my][mx];;
ek[1] -= x0*vdy_brick[mz][my][mx];;
ek[2] -= x0*vdz_brick[mz][my][mx];;
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
f[i][0] += qqrd2e*scale * q[i]*ek[0];
f[i][1] += qqrd2e*scale * q[i]*ek[1];
f[i][2] += qqrd2e*scale * q[i]*ek[2];
const double qfactor = qqrd2e*scale*q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
f[i][2] += qfactor*ekz;
}
}
@ -1758,15 +1774,16 @@ void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
charge assignment into rho1d
dx,dy,dz = distance of particle from "lower left" grid point
------------------------------------------------------------------------- */
void PPPM::compute_rho1d(double dx, double dy, double dz)
void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
const FFT_SCALAR &dz)
{
int k,l;
for (k = (1-order)/2; k <= order/2; k++) {
rho1d[0][k] = 0.0;
rho1d[1][k] = 0.0;
rho1d[2][k] = 0.0;
rho1d[0][k] = ZEROF;
rho1d[1][k] = ZEROF;
rho1d[2][k] = ZEROF;
for (l = order-1; l >= 0; l--) {
rho1d[0][k] = rho_coeff[l][k] + rho1d[0][k]*dx;
rho1d[1][k] = rho_coeff[l][k] + rho1d[1][k]*dy;
@ -1797,9 +1814,9 @@ void PPPM::compute_rho1d(double dx, double dy, double dz)
void PPPM::compute_rho_coeff()
{
int j,k,l,m;
double s;
FFT_SCALAR s;
double **a;
FFT_SCALAR **a;
memory->create2d_offset(a,order,-order,order,"pppm:a");
for (k = -order; k <= order; k++)
@ -1812,8 +1829,13 @@ void PPPM::compute_rho_coeff()
s = 0.0;
for (l = 0; l < j; l++) {
a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
#ifdef FFT_SINGLE
s += powf(0.5,(float) l+1) *
(a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
#else
s += pow(0.5,(double) l+1) *
(a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
#endif
}
a[0][k] = s;
}
@ -1874,7 +1896,7 @@ void PPPM::timing(int n, double &time3d, double &time1d)
{
double time1,time2;
for (int i = 0; i < 2*nfft_both; i++) work1[i] = 0.0;
for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
MPI_Barrier(world);
time1 = MPI_Wtime();
@ -1914,9 +1936,10 @@ double PPPM::memory_usage()
double bytes = nmax*3 * sizeof(double);
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
(nzhi_out-nzlo_out+1);
bytes += 4 * nbrick * sizeof(double);
bytes += 4 * nbrick * sizeof(FFT_SCALAR);
bytes += 6 * nfft_both * sizeof(double);
bytes += nfft_both*6 * sizeof(double);
bytes += 2 * nbuf * sizeof(double);
bytes += nfft_both * sizeof(double);
bytes += nfft_both*5 * sizeof(FFT_SCALAR);
bytes += 2 * nbuf * sizeof(FFT_SCALAR);
return bytes;
}

View File

@ -1,4 +1,4 @@
/* ----------------------------------------------------------------------
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
@ -20,6 +20,17 @@ KSpaceStyle(pppm,PPPM)
#ifndef LMP_PPPM_H
#define LMP_PPPM_H
#include "lmptype.h"
#include "mpi.h"
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
#include "kspace.h"
namespace LAMMPS_NS {
@ -28,11 +39,11 @@ class PPPM : public KSpace {
public:
PPPM(class LAMMPS *, int, char **);
virtual ~PPPM();
void init();
void setup();
void compute(int, int);
void timing(int, double &, double &);
double memory_usage();
virtual void init();
virtual void setup();
virtual void compute(int, int);
virtual void timing(int, double &, double &);
virtual double memory_usage();
protected:
int me,nprocs;
@ -54,17 +65,17 @@ class PPPM : public KSpace {
int nlower,nupper;
int ngrid,nfft,nbuf,nfft_both;
double ***density_brick;
double ***vdx_brick,***vdy_brick,***vdz_brick;
FFT_SCALAR ***density_brick;
FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick;
double *greensfn;
double **vg;
double *fkx,*fky,*fkz;
double *density_fft;
double *work1,*work2;
double *buf1,*buf2;
FFT_SCALAR *density_fft;
FFT_SCALAR *work1,*work2;
FFT_SCALAR *buf1,*buf2;
double *gf_b;
double **rho1d,**rho_coeff;
FFT_SCALAR **rho1d,**rho_coeff;
class FFT3d *fft1,*fft2;
class Remap *remap;
@ -80,8 +91,8 @@ class PPPM : public KSpace {
double alpha; // geometric factor
void set_grid();
void allocate();
void deallocate();
virtual void allocate();
virtual void deallocate();
int factorable(int);
double rms(double, double, bigint, double, double **);
double diffpr(double, double, double, double, double **);
@ -89,12 +100,13 @@ class PPPM : public KSpace {
double gf_denom(double, double, double);
virtual void particle_map();
virtual void make_rho();
void brick2fft();
void fillbrick();
void poisson(int, int);
virtual void brick2fft();
virtual void fillbrick();
virtual void poisson(int, int);
virtual void fieldforce();
void procs2grid2d(int,int,int,int *, int*);
void compute_rho1d(double, double, double);
void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &,
const FFT_SCALAR &);
void compute_rho_coeff();
void slabcorr(int);
};

View File

@ -26,6 +26,14 @@ using namespace LAMMPS_NS;
#define OFFSET 16384
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#define ONEF 1.0f
#else
#define ZEROF 0.0
#define ONEF 1.0
#endif
/* ---------------------------------------------------------------------- */
PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
@ -87,13 +95,13 @@ void PPPMTIP4P::particle_map()
void PPPMTIP4P::make_rho()
{
int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
double dx,dy,dz,x0,y0,z0;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
double *xi,xM[3];
// clear 3d density array
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = 0.0;
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
// loop over my charges, add their contribution to nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -143,13 +151,13 @@ void PPPMTIP4P::make_rho()
void PPPMTIP4P::fieldforce()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
double dx,dy,dz,x0,y0,z0;
double ek[3];
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
double *xi;
int iH1,iH2;
double xM[3];
double fx,fy,fz;
double ddotf, rOM[3], f1[3];
double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z;
// loop over my charges, interpolate electric field from nearby grid points
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
@ -160,6 +168,7 @@ void PPPMTIP4P::fieldforce()
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
@ -178,7 +187,7 @@ void PPPMTIP4P::fieldforce()
compute_rho1d(dx,dy,dz);
ek[0] = ek[1] = ek[2] = 0.0;
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
@ -188,47 +197,47 @@ void PPPMTIP4P::fieldforce()
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ek[0] -= x0*vdx_brick[mz][my][mx];
ek[1] -= x0*vdy_brick[mz][my][mx];
ek[2] -= x0*vdz_brick[mz][my][mx];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
// convert E-field to force
const double qfactor = qqrd2e*scale*q[i];
if (type[i] != typeO) {
f[i][0] += qqrd2e*q[i]*ek[0];
f[i][1] += qqrd2e*q[i]*ek[1];
f[i][2] += qqrd2e*q[i]*ek[2];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
f[i][2] += qfactor*ekz;
} else {
fx = qqrd2e * q[i] * ek[0];
fy = qqrd2e * q[i] * ek[1];
fz = qqrd2e * q[i] * ek[2];
fx = qfactor * ekx;
fy = qfactor * eky;
fz = qfactor * ekz;
find_M(i,iH1,iH2,xM);
rOM[0] = xM[0] - x[i][0];
rOM[1] = xM[1] - x[i][1];
rOM[2] = xM[2] - x[i][2];
rOMx = xM[0] - x[i][0];
rOMy = xM[1] - x[i][1];
rOMz = xM[2] - x[i][2];
ddotf = (rOM[0] * fx + rOM[1] * fy + rOM[2] * fz) / (qdist * qdist);
ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist);
f1[0] = ddotf * rOM[0];
f1[1] = ddotf * rOM[1];
f1[2] = ddotf * rOM[2];
f1x = ddotf * rOMx;
f1y = ddotf * rOMy;
f1z = ddotf * rOMz;
f[i][0] += fx - alpha * (fx - f1[0]);
f[i][1] += fy - alpha * (fy - f1[1]);
f[i][2] += fz - alpha * (fz - f1[2]);
f[i][0] += fx - alpha * (fx - f1x);
f[i][1] += fy - alpha * (fy - f1y);
f[i][2] += fz - alpha * (fz - f1z);
f[iH1][0] += 0.5*alpha*(fx - f1[0]);
f[iH1][1] += 0.5*alpha*(fy - f1[1]);
f[iH1][2] += 0.5*alpha*(fz - f1[2]);
f[iH1][0] += 0.5*alpha*(fx - f1x);
f[iH1][1] += 0.5*alpha*(fy - f1y);
f[iH1][2] += 0.5*alpha*(fz - f1z);
f[iH2][0] += 0.5*alpha*(fx - f1[0]);
f[iH2][1] += 0.5*alpha*(fy - f1[1]);
f[iH2][2] += 0.5*alpha*(fz - f1[2]);
f[iH2][0] += 0.5*alpha*(fx - f1x);
f[iH2][1] += 0.5*alpha*(fy - f1y);
f[iH2][2] += 0.5*alpha*(fz - f1z);
}
}
}

View File

@ -27,12 +27,14 @@ namespace LAMMPS_NS {
class PPPMTIP4P : public PPPM {
public:
PPPMTIP4P(class LAMMPS *, int, char **);
virtual ~PPPMTIP4P () {};
protected:
virtual void particle_map();
virtual void make_rho();
virtual void fieldforce();
private:
void particle_map();
void make_rho();
void fieldforce();
void find_M(int, int &, int &, double *);
};

View File

@ -11,10 +11,12 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "mpi.h"
#include "stdio.h"
#include "stdlib.h"
#include "remap.h"
#define PACK_DATA FFT_SCALAR
#include "pack.h"
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
@ -57,13 +59,13 @@
plan plan returned by previous call to remap_3d_create_plan
------------------------------------------------------------------------- */
void remap_3d(double *in, double *out, double *buf,
void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf,
struct remap_plan_3d *plan)
{
MPI_Status status;
int i,isend,irecv;
double *scratch;
FFT_SCALAR *scratch;
if (plan->memory == 0)
scratch = buf;
@ -74,7 +76,7 @@ void remap_3d(double *in, double *out, double *buf,
for (irecv = 0; irecv < plan->nrecv; irecv++)
MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv],
MPI_DOUBLE,plan->recv_proc[irecv],0,
MPI_FFT_SCALAR,plan->recv_proc[irecv],0,
plan->comm,&plan->request[irecv]);
// send all messages to other procs
@ -82,7 +84,7 @@ void remap_3d(double *in, double *out, double *buf,
for (isend = 0; isend < plan->nsend; isend++) {
plan->pack(&in[plan->send_offset[isend]],
plan->sendbuf,&plan->packplan[isend]);
MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_DOUBLE,
MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_FFT_SCALAR,
plan->send_proc[isend],0,plan->comm);
}
@ -150,13 +152,6 @@ struct remap_plan_3d *remap_3d_create_plan(
MPI_Comm_rank(comm,&me);
MPI_Comm_size(comm,&nprocs);
// single precision not yet supported
if (precision == 1) {
if (me == 0) printf("Single precision not supported\n");
return NULL;
}
// allocate memory for plan data struct
plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d));
@ -209,10 +204,7 @@ struct remap_plan_3d *remap_3d_create_plan(
// malloc space for send info
if (nsend) {
if (precision == 1)
plan->pack = NULL;
else
plan->pack = pack_3d;
plan->pack = pack_3d;
plan->send_offset = (int *) malloc(nsend*sizeof(int));
plan->send_size = (int *) malloc(nsend*sizeof(int));
@ -272,45 +264,23 @@ struct remap_plan_3d *remap_3d_create_plan(
// malloc space for recv info
if (nrecv) {
if (precision == 1) {
if (permute == 0)
plan->unpack = NULL;
else if (permute == 1) {
if (nqty == 1)
plan->unpack = NULL;
else if (nqty == 2)
plan->unpack = NULL;
else
plan->unpack = NULL;
}
else if (permute == 2) {
if (nqty == 1)
plan->unpack = NULL;
else if (nqty == 2)
plan->unpack = NULL;
else
plan->unpack = NULL;
}
if (permute == 0)
plan->unpack = unpack_3d;
else if (permute == 1) {
if (nqty == 1)
plan->unpack = unpack_3d_permute1_1;
else if (nqty == 2)
plan->unpack = unpack_3d_permute1_2;
else
plan->unpack = unpack_3d_permute1_n;
}
else if (precision == 2) {
if (permute == 0)
plan->unpack = unpack_3d;
else if (permute == 1) {
if (nqty == 1)
plan->unpack = unpack_3d_permute1_1;
else if (nqty == 2)
plan->unpack = unpack_3d_permute1_2;
else
plan->unpack = unpack_3d_permute1_n;
}
else if (permute == 2) {
if (nqty == 1)
plan->unpack = unpack_3d_permute2_1;
else if (nqty == 2)
plan->unpack = unpack_3d_permute2_2;
else
plan->unpack = unpack_3d_permute2_n;
}
else if (permute == 2) {
if (nqty == 1)
plan->unpack = unpack_3d_permute2_1;
else if (nqty == 2)
plan->unpack = unpack_3d_permute2_2;
else
plan->unpack = unpack_3d_permute2_n;
}
plan->recv_offset = (int *) malloc(nrecv*sizeof(int));
@ -408,10 +378,7 @@ struct remap_plan_3d *remap_3d_create_plan(
size = MAX(size,plan->send_size[nsend]);
if (size) {
if (precision == 1)
plan->sendbuf = NULL;
else
plan->sendbuf = (double *) malloc(size*sizeof(double));
plan->sendbuf = (FFT_SCALAR *) malloc(size*sizeof(FFT_SCALAR));
if (plan->sendbuf == NULL) return NULL;
}
@ -422,11 +389,8 @@ struct remap_plan_3d *remap_3d_create_plan(
if (memory == 1) {
if (nrecv > 0) {
if (precision == 1)
plan->scratch = NULL;
else
plan->scratch =
(double *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(double));
plan->scratch =
(FFT_SCALAR *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(FFT_SCALAR));
if (plan->scratch == NULL) return NULL;
}
}

View File

@ -1,4 +1,4 @@
/* ----------------------------------------------------------------------
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
@ -11,14 +11,24 @@
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include <mpi.h>
#ifdef FFT_SINGLE
typedef float FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_FLOAT
#else
typedef double FFT_SCALAR;
#define MPI_FFT_SCALAR MPI_DOUBLE
#endif
// details of how to do a 3d remap
struct remap_plan_3d {
double *sendbuf; // buffer for MPI sends
double *scratch; // scratch buffer for MPI recvs
void (*pack)(double *, double *, struct pack_plan_3d *);
FFT_SCALAR *sendbuf; // buffer for MPI sends
FFT_SCALAR *scratch; // scratch buffer for MPI recvs
void (*pack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
// which pack function to use
void (*unpack)(double *, double *, struct pack_plan_3d *);
void (*unpack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
// which unpack function to use
int *send_offset; // extraction loc for each send
int *send_size; // size of each send message
@ -47,7 +57,7 @@ struct extent_3d {
// function prototypes
void remap_3d(double *, double *, double *, struct remap_plan_3d *);
void remap_3d(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *, struct remap_plan_3d *);
struct remap_plan_3d *remap_3d_create_plan(MPI_Comm,
int, int, int, int, int, int, int, int, int, int, int, int,
int, int, int, int);

View File

@ -42,7 +42,7 @@ Remap::~Remap()
/* ---------------------------------------------------------------------- */
void Remap::perform(double *in, double *out, double *buf)
void Remap::perform(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf)
{
remap_3d(in,out,buf,plan);
}

View File

@ -1,4 +1,4 @@
/* ----------------------------------------------------------------------
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
@ -24,7 +24,7 @@ class Remap : protected Pointers {
Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
int,int,int,int,int,int,int,int,int,int);
~Remap();
void perform(double *, double *, double *);
void perform(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *);
private:
struct remap_plan_3d *plan;

View File

@ -1,757 +0,0 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#include "pack.h"
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
#define PACK_ARRAY
#endif
/* ----------------------------------------------------------------------
Pack and unpack functions:
pack routines copy strided values from data into contiguous locs in buf
unpack routines copy contiguous values from buf into strided locs in data
different versions of unpack depending on permutation
and # of values/element
PACK_ARRAY routines work via array indices (default)
PACK_POINTER routines work via pointers
PACK_MEMCPY routines work via pointers and memcpy function
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
pack/unpack with array indices
------------------------------------------------------------------------- */
#ifdef PACK_ARRAY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
out = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
buf[in++] = data[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
data[in++] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + 2*mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
instart = plane + nqty*mid;
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = 2*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
instart = nqty*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers
------------------------------------------------------------------------- */
#ifdef PACK_POINTER
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (out = begin; out < end; out++)
*(in++) = *out;
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (in = begin; in < end; in++)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers and memcpy function
no memcpy version of unpack_permute routines,
just use PACK_POINTER versions
------------------------------------------------------------------------- */
#ifdef PACK_MEMCPY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
{
register double *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(double);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(buf[upto+mid*nfast]);
out = &(data[plane+mid*nstride_line]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(double);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(data[plane+mid*nstride_line]);
out = &(buf[upto+mid*nfast]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
{
register double *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif

View File

@ -1,4 +1,4 @@
/* ----------------------------------------------------------------------
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
@ -22,13 +22,746 @@ struct pack_plan_3d {
int nqty; // # of values/element
};
// function prototypes
void pack_3d(double *, double *, struct pack_plan_3d *);
void unpack_3d(double *, double *, struct pack_plan_3d *);
void unpack_3d_permute1_1(double *, double *, struct pack_plan_3d *);
void unpack_3d_permute1_2(double *, double *, struct pack_plan_3d *);
void unpack_3d_permute1_n(double *, double *, struct pack_plan_3d *);
void unpack_3d_permute2_1(double *, double *, struct pack_plan_3d *);
void unpack_3d_permute2_2(double *, double *, struct pack_plan_3d *);
void unpack_3d_permute2_n(double *, double *, struct pack_plan_3d *);
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
#define PACK_ARRAY
#endif
#ifndef PACK_DATA
#define PACK_DATA double
#endif
/* ----------------------------------------------------------------------
Pack and unpack functions:
pack routines copy strided values from data into contiguous locs in buf
unpack routines copy contiguous values from buf into strided locs in data
different versions of unpack depending on permutation
and # of values/element
PACK_ARRAY routines work via array indices (default)
PACK_POINTER routines work via pointers
PACK_MEMCPY routines work via pointers and memcpy function
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
pack/unpack with array indices
------------------------------------------------------------------------- */
#ifdef PACK_ARRAY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
out = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
buf[in++] = data[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid*nstride_line;
for (fast = 0; fast < nfast; fast++)
data[in++] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
in = plane + 2*mid;
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
instart = plane + nqty*mid;
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line)
data[in] = buf[out++];
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
in = 2*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
data[in] = buf[out++];
data[in+1] = buf[out++];
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register int in,out,iqty,instart,fast,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = 0;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
instart = nqty*slow + mid*nstride_plane;
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers
------------------------------------------------------------------------- */
#ifdef PACK_POINTER
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
in = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (out = begin; out < end; out++)
*(in++) = *out;
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid*nstride_line]);
end = begin + nfast;
for (in = begin; in < end; in++)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif
/* ----------------------------------------------------------------------
pack/unpack with pointers and memcpy function
no memcpy version of unpack_permute routines,
just use PACK_POINTER versions
------------------------------------------------------------------------- */
#ifdef PACK_MEMCPY
/* ----------------------------------------------------------------------
pack from data -> buf
------------------------------------------------------------------------- */
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(PACK_DATA);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(buf[upto+mid*nfast]);
out = &(data[plane+mid*nstride_line]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data
------------------------------------------------------------------------- */
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out;
register int mid,slow,size;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
size = nfast*sizeof(PACK_DATA);
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_plane;
upto = slow*nmid*nfast;
for (mid = 0; mid < nmid; mid++) {
in = &(data[plane+mid*nstride_line]);
out = &(buf[upto+mid*nfast]);
memcpy(in,out,size);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+2*mid]);
end = begin + nfast*nstride_plane;
for (in = begin; in < end; in += nstride_plane) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, one axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
plane = slow*nstride_line;
for (mid = 0; mid < nmid; mid++) {
begin = &(data[plane+nqty*mid]);
end = begin + nfast*nstride_plane;
for (instart = begin; instart < end; instart += nstride_plane) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 1 value/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line)
*in = *(out++);
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, 2 values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*begin,*end;
register int mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[2*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (in = begin; in < end; in += nstride_line) {
*in = *(out++);
*(in+1) = *(out++);
}
}
}
}
/* ----------------------------------------------------------------------
unpack from buf -> data, two axis permutation, nqty values/element
------------------------------------------------------------------------- */
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
{
register PACK_DATA *in,*out,*instart,*begin,*end;
register int iqty,mid,slow;
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
nfast = plan->nfast;
nmid = plan->nmid;
nslow = plan->nslow;
nstride_line = plan->nstride_line;
nstride_plane = plan->nstride_plane;
nqty = plan->nqty;
out = buf;
for (slow = 0; slow < nslow; slow++) {
for (mid = 0; mid < nmid; mid++) {
begin = &(data[nqty*slow+mid*nstride_plane]);
end = begin + nfast*nstride_line;
for (instart = begin; instart < end; instart += nstride_line) {
in = instart;
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
}
}
}
}
#endif