forked from lijiext/lammps
git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@6622 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
parent
a54f7e6b5b
commit
8745580d5d
|
@ -43,6 +43,14 @@ using namespace LAMMPS_NS;
|
|||
#define LARGE 10000.0
|
||||
#define EPS_HOC 1.0e-7
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
#define ZEROF 0.0f
|
||||
#define ONEF 1.0f
|
||||
#else
|
||||
#define ZEROF 0.0
|
||||
#define ONEF 1.0
|
||||
#endif
|
||||
|
||||
#define MIN(a,b) ((a) < (b) ? (a) : (b))
|
||||
#define MAX(a,b) ((a) > (b) ? (a) : (b))
|
||||
|
||||
|
@ -50,7 +58,7 @@ using namespace LAMMPS_NS;
|
|||
|
||||
PPPM::PPPM(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg)
|
||||
{
|
||||
if (narg != 1) error->all("Illegal kspace_style pppm command");
|
||||
if (narg < 1) error->all("Illegal kspace_style pppm command");
|
||||
|
||||
precision = atof(arg[0]);
|
||||
PI = 4.0*atan(1.0);
|
||||
|
@ -754,7 +762,7 @@ void PPPM::allocate()
|
|||
|
||||
// summation coeffs
|
||||
|
||||
gf_b = new double[order];
|
||||
memory->create(gf_b,order,"pppm:gf_b");
|
||||
memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
|
||||
memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
|
||||
|
||||
|
@ -778,7 +786,7 @@ void PPPM::allocate()
|
|||
remap = new Remap(lmp,world,
|
||||
nxlo_in,nxhi_in,nylo_in,nyhi_in,nzlo_in,nzhi_in,
|
||||
nxlo_fft,nxhi_fft,nylo_fft,nyhi_fft,nzlo_fft,nzhi_fft,
|
||||
1,0,0,2);
|
||||
1,0,0,FFT_PRECISION);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
|
@ -805,7 +813,7 @@ void PPPM::deallocate()
|
|||
memory->destroy(buf1);
|
||||
memory->destroy(buf2);
|
||||
|
||||
delete [] gf_b;
|
||||
memory->destroy(gf_b);
|
||||
memory->destroy2d_offset(rho1d,-order/2);
|
||||
memory->destroy2d_offset(rho_coeff,(1-order)/2);
|
||||
|
||||
|
@ -967,17 +975,24 @@ void PPPM::set_grid()
|
|||
// print info
|
||||
|
||||
if (me == 0) {
|
||||
#ifdef FFT_SINGLE
|
||||
const char fft_prec[] = "single";
|
||||
#else
|
||||
const char fft_prec[] = "double";
|
||||
#endif
|
||||
if (screen) {
|
||||
fprintf(screen," G vector = %g\n",g_ewald);
|
||||
fprintf(screen," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
|
||||
fprintf(screen," stencil order = %d\n",order);
|
||||
fprintf(screen," RMS precision = %g\n",MAX(lpr,spr));
|
||||
fprintf(screen," using %s precision FFTs\n",fft_prec);
|
||||
}
|
||||
if (logfile) {
|
||||
fprintf(logfile," G vector = %g\n",g_ewald);
|
||||
fprintf(logfile," grid = %d %d %d\n",nx_pppm,ny_pppm,nz_pppm);
|
||||
fprintf(logfile," stencil order = %d\n",order);
|
||||
fprintf(logfile," RMS precision = %g\n",MAX(lpr,spr));
|
||||
fprintf(logfile," using %s precision FFTs\n",fft_prec);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1036,7 +1051,7 @@ double PPPM::diffpr(double hx, double hy, double hz, double q2, double **acons)
|
|||
lprz = rms(hz,zprd*slab_volfactor,natoms,q2,acons);
|
||||
kspace_prec = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
|
||||
real_prec = 2.0*q2 * exp(-g_ewald*g_ewald*cutoff*cutoff) /
|
||||
sqrt(natoms*cutoff*xprd*yprd*zprd);
|
||||
sqrt(static_cast<double>(natoms)*cutoff*xprd*yprd*zprd);
|
||||
double value = kspace_prec - real_prec;
|
||||
return value;
|
||||
}
|
||||
|
@ -1113,8 +1128,8 @@ void PPPM::brick2fft()
|
|||
if (comm->procneigh[0][1] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1137,8 +1152,8 @@ void PPPM::brick2fft()
|
|||
if (comm->procneigh[0][0] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1161,8 +1176,8 @@ void PPPM::brick2fft()
|
|||
if (comm->procneigh[1][1] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1185,8 +1200,8 @@ void PPPM::brick2fft()
|
|||
if (comm->procneigh[1][0] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1209,8 +1224,8 @@ void PPPM::brick2fft()
|
|||
if (comm->procneigh[2][1] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1233,8 +1248,8 @@ void PPPM::brick2fft()
|
|||
if (comm->procneigh[2][0] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1284,8 +1299,8 @@ void PPPM::fillbrick()
|
|||
if (comm->procneigh[2][1] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][1],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1314,8 +1329,8 @@ void PPPM::fillbrick()
|
|||
if (comm->procneigh[2][0] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[2][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[2][0],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[2][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[2][0],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1344,8 +1359,8 @@ void PPPM::fillbrick()
|
|||
if (comm->procneigh[1][1] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][1],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1374,8 +1389,8 @@ void PPPM::fillbrick()
|
|||
if (comm->procneigh[1][0] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[1][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[1][0],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[1][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[1][0],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1404,8 +1419,8 @@ void PPPM::fillbrick()
|
|||
if (comm->procneigh[0][1] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][1],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1434,8 +1449,8 @@ void PPPM::fillbrick()
|
|||
if (comm->procneigh[0][0] == me)
|
||||
for (i = 0; i < n; i++) buf2[i] = buf1[i];
|
||||
else {
|
||||
MPI_Irecv(buf2,nbuf,MPI_DOUBLE,comm->procneigh[0][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_DOUBLE,comm->procneigh[0][0],0,world);
|
||||
MPI_Irecv(buf2,nbuf,MPI_FFT_SCALAR,comm->procneigh[0][1],0,world,&request);
|
||||
MPI_Send(buf1,n,MPI_FFT_SCALAR,comm->procneigh[0][0],0,world);
|
||||
MPI_Wait(&request,&status);
|
||||
}
|
||||
|
||||
|
@ -1497,12 +1512,12 @@ void PPPM::particle_map()
|
|||
void PPPM::make_rho()
|
||||
{
|
||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||
double dx,dy,dz,x0,y0,z0;
|
||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||
|
||||
// clear 3d density array
|
||||
|
||||
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
for (i = 0; i < ngrid; i++) vec[i] = 0.0;
|
||||
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
|
||||
|
||||
// loop over my charges, add their contribution to nearby grid points
|
||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||
|
@ -1554,7 +1569,7 @@ void PPPM::poisson(int eflag, int vflag)
|
|||
n = 0;
|
||||
for (i = 0; i < nfft; i++) {
|
||||
work1[n++] = density_fft[i];
|
||||
work1[n++] = 0.0;
|
||||
work1[n++] = ZEROF;
|
||||
}
|
||||
|
||||
fft1->compute(work1,work1,1);
|
||||
|
@ -1667,8 +1682,8 @@ void PPPM::poisson(int eflag, int vflag)
|
|||
void PPPM::fieldforce()
|
||||
{
|
||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||
double dx,dy,dz,x0,y0,z0;
|
||||
double ek[3];
|
||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||
FFT_SCALAR ekx,eky,ekz;
|
||||
|
||||
// loop over my charges, interpolate electric field from nearby grid points
|
||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||
|
@ -1679,6 +1694,7 @@ void PPPM::fieldforce()
|
|||
double *q = atom->q;
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
for (i = 0; i < nlocal; i++) {
|
||||
|
@ -1691,7 +1707,7 @@ void PPPM::fieldforce()
|
|||
|
||||
compute_rho1d(dx,dy,dz);
|
||||
|
||||
ek[0] = ek[1] = ek[2] = 0.0;
|
||||
ekx = eky = ekz = ZEROF;
|
||||
for (n = nlower; n <= nupper; n++) {
|
||||
mz = n+nz;
|
||||
z0 = rho1d[2][n];
|
||||
|
@ -1701,18 +1717,18 @@ void PPPM::fieldforce()
|
|||
for (l = nlower; l <= nupper; l++) {
|
||||
mx = l+nx;
|
||||
x0 = y0*rho1d[0][l];
|
||||
ek[0] -= x0*vdx_brick[mz][my][mx];;
|
||||
ek[1] -= x0*vdy_brick[mz][my][mx];;
|
||||
ek[2] -= x0*vdz_brick[mz][my][mx];;
|
||||
ekx -= x0*vdx_brick[mz][my][mx];
|
||||
eky -= x0*vdy_brick[mz][my][mx];
|
||||
ekz -= x0*vdz_brick[mz][my][mx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert E-field to force
|
||||
|
||||
f[i][0] += qqrd2e*scale * q[i]*ek[0];
|
||||
f[i][1] += qqrd2e*scale * q[i]*ek[1];
|
||||
f[i][2] += qqrd2e*scale * q[i]*ek[2];
|
||||
const double qfactor = qqrd2e*scale*q[i];
|
||||
f[i][0] += qfactor*ekx;
|
||||
f[i][1] += qfactor*eky;
|
||||
f[i][2] += qfactor*ekz;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1758,15 +1774,16 @@ void PPPM::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
|
|||
charge assignment into rho1d
|
||||
dx,dy,dz = distance of particle from "lower left" grid point
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PPPM::compute_rho1d(double dx, double dy, double dz)
|
||||
void PPPM::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy,
|
||||
const FFT_SCALAR &dz)
|
||||
{
|
||||
int k,l;
|
||||
|
||||
for (k = (1-order)/2; k <= order/2; k++) {
|
||||
rho1d[0][k] = 0.0;
|
||||
rho1d[1][k] = 0.0;
|
||||
rho1d[2][k] = 0.0;
|
||||
rho1d[0][k] = ZEROF;
|
||||
rho1d[1][k] = ZEROF;
|
||||
rho1d[2][k] = ZEROF;
|
||||
|
||||
for (l = order-1; l >= 0; l--) {
|
||||
rho1d[0][k] = rho_coeff[l][k] + rho1d[0][k]*dx;
|
||||
rho1d[1][k] = rho_coeff[l][k] + rho1d[1][k]*dy;
|
||||
|
@ -1797,9 +1814,9 @@ void PPPM::compute_rho1d(double dx, double dy, double dz)
|
|||
void PPPM::compute_rho_coeff()
|
||||
{
|
||||
int j,k,l,m;
|
||||
double s;
|
||||
FFT_SCALAR s;
|
||||
|
||||
double **a;
|
||||
FFT_SCALAR **a;
|
||||
memory->create2d_offset(a,order,-order,order,"pppm:a");
|
||||
|
||||
for (k = -order; k <= order; k++)
|
||||
|
@ -1812,8 +1829,13 @@ void PPPM::compute_rho_coeff()
|
|||
s = 0.0;
|
||||
for (l = 0; l < j; l++) {
|
||||
a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
|
||||
#ifdef FFT_SINGLE
|
||||
s += powf(0.5,(float) l+1) *
|
||||
(a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
|
||||
#else
|
||||
s += pow(0.5,(double) l+1) *
|
||||
(a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
|
||||
#endif
|
||||
}
|
||||
a[0][k] = s;
|
||||
}
|
||||
|
@ -1874,7 +1896,7 @@ void PPPM::timing(int n, double &time3d, double &time1d)
|
|||
{
|
||||
double time1,time2;
|
||||
|
||||
for (int i = 0; i < 2*nfft_both; i++) work1[i] = 0.0;
|
||||
for (int i = 0; i < 2*nfft_both; i++) work1[i] = ZEROF;
|
||||
|
||||
MPI_Barrier(world);
|
||||
time1 = MPI_Wtime();
|
||||
|
@ -1914,9 +1936,10 @@ double PPPM::memory_usage()
|
|||
double bytes = nmax*3 * sizeof(double);
|
||||
int nbrick = (nxhi_out-nxlo_out+1) * (nyhi_out-nylo_out+1) *
|
||||
(nzhi_out-nzlo_out+1);
|
||||
bytes += 4 * nbrick * sizeof(double);
|
||||
bytes += 4 * nbrick * sizeof(FFT_SCALAR);
|
||||
bytes += 6 * nfft_both * sizeof(double);
|
||||
bytes += nfft_both*6 * sizeof(double);
|
||||
bytes += 2 * nbuf * sizeof(double);
|
||||
bytes += nfft_both * sizeof(double);
|
||||
bytes += nfft_both*5 * sizeof(FFT_SCALAR);
|
||||
bytes += 2 * nbuf * sizeof(FFT_SCALAR);
|
||||
return bytes;
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
@ -20,6 +20,17 @@ KSpaceStyle(pppm,PPPM)
|
|||
#ifndef LMP_PPPM_H
|
||||
#define LMP_PPPM_H
|
||||
|
||||
#include "lmptype.h"
|
||||
#include "mpi.h"
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
typedef float FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||
#else
|
||||
typedef double FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||
#endif
|
||||
|
||||
#include "kspace.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
@ -28,11 +39,11 @@ class PPPM : public KSpace {
|
|||
public:
|
||||
PPPM(class LAMMPS *, int, char **);
|
||||
virtual ~PPPM();
|
||||
void init();
|
||||
void setup();
|
||||
void compute(int, int);
|
||||
void timing(int, double &, double &);
|
||||
double memory_usage();
|
||||
virtual void init();
|
||||
virtual void setup();
|
||||
virtual void compute(int, int);
|
||||
virtual void timing(int, double &, double &);
|
||||
virtual double memory_usage();
|
||||
|
||||
protected:
|
||||
int me,nprocs;
|
||||
|
@ -54,17 +65,17 @@ class PPPM : public KSpace {
|
|||
int nlower,nupper;
|
||||
int ngrid,nfft,nbuf,nfft_both;
|
||||
|
||||
double ***density_brick;
|
||||
double ***vdx_brick,***vdy_brick,***vdz_brick;
|
||||
FFT_SCALAR ***density_brick;
|
||||
FFT_SCALAR ***vdx_brick,***vdy_brick,***vdz_brick;
|
||||
double *greensfn;
|
||||
double **vg;
|
||||
double *fkx,*fky,*fkz;
|
||||
double *density_fft;
|
||||
double *work1,*work2;
|
||||
double *buf1,*buf2;
|
||||
FFT_SCALAR *density_fft;
|
||||
FFT_SCALAR *work1,*work2;
|
||||
FFT_SCALAR *buf1,*buf2;
|
||||
|
||||
double *gf_b;
|
||||
double **rho1d,**rho_coeff;
|
||||
FFT_SCALAR **rho1d,**rho_coeff;
|
||||
|
||||
class FFT3d *fft1,*fft2;
|
||||
class Remap *remap;
|
||||
|
@ -80,8 +91,8 @@ class PPPM : public KSpace {
|
|||
double alpha; // geometric factor
|
||||
|
||||
void set_grid();
|
||||
void allocate();
|
||||
void deallocate();
|
||||
virtual void allocate();
|
||||
virtual void deallocate();
|
||||
int factorable(int);
|
||||
double rms(double, double, bigint, double, double **);
|
||||
double diffpr(double, double, double, double, double **);
|
||||
|
@ -89,12 +100,13 @@ class PPPM : public KSpace {
|
|||
double gf_denom(double, double, double);
|
||||
virtual void particle_map();
|
||||
virtual void make_rho();
|
||||
void brick2fft();
|
||||
void fillbrick();
|
||||
void poisson(int, int);
|
||||
virtual void brick2fft();
|
||||
virtual void fillbrick();
|
||||
virtual void poisson(int, int);
|
||||
virtual void fieldforce();
|
||||
void procs2grid2d(int,int,int,int *, int*);
|
||||
void compute_rho1d(double, double, double);
|
||||
void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &,
|
||||
const FFT_SCALAR &);
|
||||
void compute_rho_coeff();
|
||||
void slabcorr(int);
|
||||
};
|
||||
|
|
|
@ -26,6 +26,14 @@ using namespace LAMMPS_NS;
|
|||
|
||||
#define OFFSET 16384
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
#define ZEROF 0.0f
|
||||
#define ONEF 1.0f
|
||||
#else
|
||||
#define ZEROF 0.0
|
||||
#define ONEF 1.0
|
||||
#endif
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
|
||||
|
@ -87,13 +95,13 @@ void PPPMTIP4P::particle_map()
|
|||
void PPPMTIP4P::make_rho()
|
||||
{
|
||||
int i,l,m,n,nx,ny,nz,mx,my,mz,iH1,iH2;
|
||||
double dx,dy,dz,x0,y0,z0;
|
||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||
double *xi,xM[3];
|
||||
|
||||
// clear 3d density array
|
||||
|
||||
double *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
for (i = 0; i < ngrid; i++) vec[i] = 0.0;
|
||||
FFT_SCALAR *vec = &density_brick[nzlo_out][nylo_out][nxlo_out];
|
||||
for (i = 0; i < ngrid; i++) vec[i] = ZEROF;
|
||||
|
||||
// loop over my charges, add their contribution to nearby grid points
|
||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||
|
@ -143,13 +151,13 @@ void PPPMTIP4P::make_rho()
|
|||
void PPPMTIP4P::fieldforce()
|
||||
{
|
||||
int i,l,m,n,nx,ny,nz,mx,my,mz;
|
||||
double dx,dy,dz,x0,y0,z0;
|
||||
double ek[3];
|
||||
FFT_SCALAR dx,dy,dz,x0,y0,z0;
|
||||
FFT_SCALAR ekx,eky,ekz;
|
||||
double *xi;
|
||||
int iH1,iH2;
|
||||
double xM[3];
|
||||
double fx,fy,fz;
|
||||
double ddotf, rOM[3], f1[3];
|
||||
double ddotf, rOMx, rOMy, rOMz, f1x, f1y, f1z;
|
||||
|
||||
// loop over my charges, interpolate electric field from nearby grid points
|
||||
// (nx,ny,nz) = global coords of grid pt to "lower left" of charge
|
||||
|
@ -160,6 +168,7 @@ void PPPMTIP4P::fieldforce()
|
|||
double *q = atom->q;
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
|
||||
|
@ -178,7 +187,7 @@ void PPPMTIP4P::fieldforce()
|
|||
|
||||
compute_rho1d(dx,dy,dz);
|
||||
|
||||
ek[0] = ek[1] = ek[2] = 0.0;
|
||||
ekx = eky = ekz = ZEROF;
|
||||
for (n = nlower; n <= nupper; n++) {
|
||||
mz = n+nz;
|
||||
z0 = rho1d[2][n];
|
||||
|
@ -188,47 +197,47 @@ void PPPMTIP4P::fieldforce()
|
|||
for (l = nlower; l <= nupper; l++) {
|
||||
mx = l+nx;
|
||||
x0 = y0*rho1d[0][l];
|
||||
ek[0] -= x0*vdx_brick[mz][my][mx];
|
||||
ek[1] -= x0*vdy_brick[mz][my][mx];
|
||||
ek[2] -= x0*vdz_brick[mz][my][mx];
|
||||
ekx -= x0*vdx_brick[mz][my][mx];
|
||||
eky -= x0*vdy_brick[mz][my][mx];
|
||||
ekz -= x0*vdz_brick[mz][my][mx];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert E-field to force
|
||||
|
||||
const double qfactor = qqrd2e*scale*q[i];
|
||||
if (type[i] != typeO) {
|
||||
f[i][0] += qqrd2e*q[i]*ek[0];
|
||||
f[i][1] += qqrd2e*q[i]*ek[1];
|
||||
f[i][2] += qqrd2e*q[i]*ek[2];
|
||||
f[i][0] += qfactor*ekx;
|
||||
f[i][1] += qfactor*eky;
|
||||
f[i][2] += qfactor*ekz;
|
||||
} else {
|
||||
|
||||
fx = qqrd2e * q[i] * ek[0];
|
||||
fy = qqrd2e * q[i] * ek[1];
|
||||
fz = qqrd2e * q[i] * ek[2];
|
||||
fx = qfactor * ekx;
|
||||
fy = qfactor * eky;
|
||||
fz = qfactor * ekz;
|
||||
find_M(i,iH1,iH2,xM);
|
||||
|
||||
rOM[0] = xM[0] - x[i][0];
|
||||
rOM[1] = xM[1] - x[i][1];
|
||||
rOM[2] = xM[2] - x[i][2];
|
||||
rOMx = xM[0] - x[i][0];
|
||||
rOMy = xM[1] - x[i][1];
|
||||
rOMz = xM[2] - x[i][2];
|
||||
|
||||
ddotf = (rOM[0] * fx + rOM[1] * fy + rOM[2] * fz) / (qdist * qdist);
|
||||
ddotf = (rOMx * fx + rOMy * fy + rOMz * fz) / (qdist * qdist);
|
||||
|
||||
f1[0] = ddotf * rOM[0];
|
||||
f1[1] = ddotf * rOM[1];
|
||||
f1[2] = ddotf * rOM[2];
|
||||
f1x = ddotf * rOMx;
|
||||
f1y = ddotf * rOMy;
|
||||
f1z = ddotf * rOMz;
|
||||
|
||||
f[i][0] += fx - alpha * (fx - f1[0]);
|
||||
f[i][1] += fy - alpha * (fy - f1[1]);
|
||||
f[i][2] += fz - alpha * (fz - f1[2]);
|
||||
f[i][0] += fx - alpha * (fx - f1x);
|
||||
f[i][1] += fy - alpha * (fy - f1y);
|
||||
f[i][2] += fz - alpha * (fz - f1z);
|
||||
|
||||
f[iH1][0] += 0.5*alpha*(fx - f1[0]);
|
||||
f[iH1][1] += 0.5*alpha*(fy - f1[1]);
|
||||
f[iH1][2] += 0.5*alpha*(fz - f1[2]);
|
||||
f[iH1][0] += 0.5*alpha*(fx - f1x);
|
||||
f[iH1][1] += 0.5*alpha*(fy - f1y);
|
||||
f[iH1][2] += 0.5*alpha*(fz - f1z);
|
||||
|
||||
f[iH2][0] += 0.5*alpha*(fx - f1[0]);
|
||||
f[iH2][1] += 0.5*alpha*(fy - f1[1]);
|
||||
f[iH2][2] += 0.5*alpha*(fz - f1[2]);
|
||||
f[iH2][0] += 0.5*alpha*(fx - f1x);
|
||||
f[iH2][1] += 0.5*alpha*(fy - f1y);
|
||||
f[iH2][2] += 0.5*alpha*(fz - f1z);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,12 +27,14 @@ namespace LAMMPS_NS {
|
|||
class PPPMTIP4P : public PPPM {
|
||||
public:
|
||||
PPPMTIP4P(class LAMMPS *, int, char **);
|
||||
virtual ~PPPMTIP4P () {};
|
||||
|
||||
protected:
|
||||
virtual void particle_map();
|
||||
virtual void make_rho();
|
||||
virtual void fieldforce();
|
||||
|
||||
private:
|
||||
void particle_map();
|
||||
void make_rho();
|
||||
void fieldforce();
|
||||
|
||||
void find_M(int, int &, int &, double *);
|
||||
};
|
||||
|
||||
|
|
|
@ -11,10 +11,12 @@
|
|||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "remap.h"
|
||||
|
||||
#define PACK_DATA FFT_SCALAR
|
||||
|
||||
#include "pack.h"
|
||||
|
||||
#define MIN(A,B) ((A) < (B)) ? (A) : (B)
|
||||
|
@ -57,13 +59,13 @@
|
|||
plan plan returned by previous call to remap_3d_create_plan
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void remap_3d(double *in, double *out, double *buf,
|
||||
void remap_3d(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf,
|
||||
struct remap_plan_3d *plan)
|
||||
|
||||
{
|
||||
MPI_Status status;
|
||||
int i,isend,irecv;
|
||||
double *scratch;
|
||||
FFT_SCALAR *scratch;
|
||||
|
||||
if (plan->memory == 0)
|
||||
scratch = buf;
|
||||
|
@ -74,7 +76,7 @@ void remap_3d(double *in, double *out, double *buf,
|
|||
|
||||
for (irecv = 0; irecv < plan->nrecv; irecv++)
|
||||
MPI_Irecv(&scratch[plan->recv_bufloc[irecv]],plan->recv_size[irecv],
|
||||
MPI_DOUBLE,plan->recv_proc[irecv],0,
|
||||
MPI_FFT_SCALAR,plan->recv_proc[irecv],0,
|
||||
plan->comm,&plan->request[irecv]);
|
||||
|
||||
// send all messages to other procs
|
||||
|
@ -82,7 +84,7 @@ void remap_3d(double *in, double *out, double *buf,
|
|||
for (isend = 0; isend < plan->nsend; isend++) {
|
||||
plan->pack(&in[plan->send_offset[isend]],
|
||||
plan->sendbuf,&plan->packplan[isend]);
|
||||
MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_DOUBLE,
|
||||
MPI_Send(plan->sendbuf,plan->send_size[isend],MPI_FFT_SCALAR,
|
||||
plan->send_proc[isend],0,plan->comm);
|
||||
}
|
||||
|
||||
|
@ -150,13 +152,6 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||
MPI_Comm_rank(comm,&me);
|
||||
MPI_Comm_size(comm,&nprocs);
|
||||
|
||||
// single precision not yet supported
|
||||
|
||||
if (precision == 1) {
|
||||
if (me == 0) printf("Single precision not supported\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// allocate memory for plan data struct
|
||||
|
||||
plan = (struct remap_plan_3d *) malloc(sizeof(struct remap_plan_3d));
|
||||
|
@ -209,10 +204,7 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||
// malloc space for send info
|
||||
|
||||
if (nsend) {
|
||||
if (precision == 1)
|
||||
plan->pack = NULL;
|
||||
else
|
||||
plan->pack = pack_3d;
|
||||
plan->pack = pack_3d;
|
||||
|
||||
plan->send_offset = (int *) malloc(nsend*sizeof(int));
|
||||
plan->send_size = (int *) malloc(nsend*sizeof(int));
|
||||
|
@ -272,45 +264,23 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||
// malloc space for recv info
|
||||
|
||||
if (nrecv) {
|
||||
if (precision == 1) {
|
||||
if (permute == 0)
|
||||
plan->unpack = NULL;
|
||||
else if (permute == 1) {
|
||||
if (nqty == 1)
|
||||
plan->unpack = NULL;
|
||||
else if (nqty == 2)
|
||||
plan->unpack = NULL;
|
||||
else
|
||||
plan->unpack = NULL;
|
||||
}
|
||||
else if (permute == 2) {
|
||||
if (nqty == 1)
|
||||
plan->unpack = NULL;
|
||||
else if (nqty == 2)
|
||||
plan->unpack = NULL;
|
||||
else
|
||||
plan->unpack = NULL;
|
||||
}
|
||||
if (permute == 0)
|
||||
plan->unpack = unpack_3d;
|
||||
else if (permute == 1) {
|
||||
if (nqty == 1)
|
||||
plan->unpack = unpack_3d_permute1_1;
|
||||
else if (nqty == 2)
|
||||
plan->unpack = unpack_3d_permute1_2;
|
||||
else
|
||||
plan->unpack = unpack_3d_permute1_n;
|
||||
}
|
||||
else if (precision == 2) {
|
||||
if (permute == 0)
|
||||
plan->unpack = unpack_3d;
|
||||
else if (permute == 1) {
|
||||
if (nqty == 1)
|
||||
plan->unpack = unpack_3d_permute1_1;
|
||||
else if (nqty == 2)
|
||||
plan->unpack = unpack_3d_permute1_2;
|
||||
else
|
||||
plan->unpack = unpack_3d_permute1_n;
|
||||
}
|
||||
else if (permute == 2) {
|
||||
if (nqty == 1)
|
||||
plan->unpack = unpack_3d_permute2_1;
|
||||
else if (nqty == 2)
|
||||
plan->unpack = unpack_3d_permute2_2;
|
||||
else
|
||||
plan->unpack = unpack_3d_permute2_n;
|
||||
}
|
||||
else if (permute == 2) {
|
||||
if (nqty == 1)
|
||||
plan->unpack = unpack_3d_permute2_1;
|
||||
else if (nqty == 2)
|
||||
plan->unpack = unpack_3d_permute2_2;
|
||||
else
|
||||
plan->unpack = unpack_3d_permute2_n;
|
||||
}
|
||||
|
||||
plan->recv_offset = (int *) malloc(nrecv*sizeof(int));
|
||||
|
@ -408,10 +378,7 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||
size = MAX(size,plan->send_size[nsend]);
|
||||
|
||||
if (size) {
|
||||
if (precision == 1)
|
||||
plan->sendbuf = NULL;
|
||||
else
|
||||
plan->sendbuf = (double *) malloc(size*sizeof(double));
|
||||
plan->sendbuf = (FFT_SCALAR *) malloc(size*sizeof(FFT_SCALAR));
|
||||
if (plan->sendbuf == NULL) return NULL;
|
||||
}
|
||||
|
||||
|
@ -422,11 +389,8 @@ struct remap_plan_3d *remap_3d_create_plan(
|
|||
|
||||
if (memory == 1) {
|
||||
if (nrecv > 0) {
|
||||
if (precision == 1)
|
||||
plan->scratch = NULL;
|
||||
else
|
||||
plan->scratch =
|
||||
(double *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(double));
|
||||
plan->scratch =
|
||||
(FFT_SCALAR *) malloc(nqty*out.isize*out.jsize*out.ksize*sizeof(FFT_SCALAR));
|
||||
if (plan->scratch == NULL) return NULL;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
@ -11,14 +11,24 @@
|
|||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
|
||||
#ifdef FFT_SINGLE
|
||||
typedef float FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
||||
#else
|
||||
typedef double FFT_SCALAR;
|
||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
||||
#endif
|
||||
|
||||
// details of how to do a 3d remap
|
||||
|
||||
struct remap_plan_3d {
|
||||
double *sendbuf; // buffer for MPI sends
|
||||
double *scratch; // scratch buffer for MPI recvs
|
||||
void (*pack)(double *, double *, struct pack_plan_3d *);
|
||||
FFT_SCALAR *sendbuf; // buffer for MPI sends
|
||||
FFT_SCALAR *scratch; // scratch buffer for MPI recvs
|
||||
void (*pack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
|
||||
// which pack function to use
|
||||
void (*unpack)(double *, double *, struct pack_plan_3d *);
|
||||
void (*unpack)(FFT_SCALAR *, FFT_SCALAR *, struct pack_plan_3d *);
|
||||
// which unpack function to use
|
||||
int *send_offset; // extraction loc for each send
|
||||
int *send_size; // size of each send message
|
||||
|
@ -47,7 +57,7 @@ struct extent_3d {
|
|||
|
||||
// function prototypes
|
||||
|
||||
void remap_3d(double *, double *, double *, struct remap_plan_3d *);
|
||||
void remap_3d(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *, struct remap_plan_3d *);
|
||||
struct remap_plan_3d *remap_3d_create_plan(MPI_Comm,
|
||||
int, int, int, int, int, int, int, int, int, int, int, int,
|
||||
int, int, int, int);
|
||||
|
|
|
@ -42,7 +42,7 @@ Remap::~Remap()
|
|||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void Remap::perform(double *in, double *out, double *buf)
|
||||
void Remap::perform(FFT_SCALAR *in, FFT_SCALAR *out, FFT_SCALAR *buf)
|
||||
{
|
||||
remap_3d(in,out,buf,plan);
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
@ -24,7 +24,7 @@ class Remap : protected Pointers {
|
|||
Remap(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
|
||||
int,int,int,int,int,int,int,int,int,int);
|
||||
~Remap();
|
||||
void perform(double *, double *, double *);
|
||||
void perform(FFT_SCALAR *, FFT_SCALAR *, FFT_SCALAR *);
|
||||
|
||||
private:
|
||||
struct remap_plan_3d *plan;
|
||||
|
|
757
src/pack.cpp
757
src/pack.cpp
|
@ -1,757 +0,0 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "pack.h"
|
||||
|
||||
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
|
||||
#define PACK_ARRAY
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Pack and unpack functions:
|
||||
|
||||
pack routines copy strided values from data into contiguous locs in buf
|
||||
unpack routines copy contiguous values from buf into strided locs in data
|
||||
different versions of unpack depending on permutation
|
||||
and # of values/element
|
||||
PACK_ARRAY routines work via array indices (default)
|
||||
PACK_POINTER routines work via pointers
|
||||
PACK_MEMCPY routines work via pointers and memcpy function
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack/unpack with array indices
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_ARRAY
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
in = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
out = plane + mid*nstride_line;
|
||||
for (fast = 0; fast < nfast; fast++)
|
||||
buf[in++] = data[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = plane + mid*nstride_line;
|
||||
for (fast = 0; fast < nfast; fast++)
|
||||
data[in++] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = plane + mid;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
|
||||
data[in] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = plane + 2*mid;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
|
||||
data[in] = buf[out++];
|
||||
data[in+1] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,iqty,instart,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
instart = plane + nqty*mid;
|
||||
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = slow + mid*nstride_plane;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_line)
|
||||
data[in] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = 2*slow + mid*nstride_plane;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
|
||||
data[in] = buf[out++];
|
||||
data[in+1] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,iqty,instart,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
instart = nqty*slow + mid*nstride_plane;
|
||||
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack/unpack with pointers
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_POINTER
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
in = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid*nstride_line]);
|
||||
end = begin + nfast;
|
||||
for (out = begin; out < end; out++)
|
||||
*(in++) = *out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid*nstride_line]);
|
||||
end = begin + nfast;
|
||||
for (in = begin; in < end; in++)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+2*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+nqty*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (instart = begin; instart < end; instart += nstride_plane) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[2*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[nqty*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (instart = begin; instart < end; instart += nstride_line) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack/unpack with pointers and memcpy function
|
||||
no memcpy version of unpack_permute routines,
|
||||
just use PACK_POINTER versions
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_MEMCPY
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void pack_3d(double *data, double *buf, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out;
|
||||
register int mid,slow,size;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
size = nfast*sizeof(double);
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
upto = slow*nmid*nfast;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = &(buf[upto+mid*nfast]);
|
||||
out = &(data[plane+mid*nstride_line]);
|
||||
memcpy(in,out,size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out;
|
||||
register int mid,slow,size;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
size = nfast*sizeof(double);
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
upto = slow*nmid*nfast;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = &(data[plane+mid*nstride_line]);
|
||||
out = &(buf[upto+mid*nfast]);
|
||||
memcpy(in,out,size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_1(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_2(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+2*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute1_n(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+nqty*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (instart = begin; instart < end; instart += nstride_plane) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_1(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_2(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[2*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void unpack_3d_permute2_n(double *buf, double *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register double *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[nqty*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (instart = begin; instart < end; instart += nstride_line) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
753
src/pack.h
753
src/pack.h
|
@ -1,4 +1,4 @@
|
|||
/* ----------------------------------------------------------------------
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
@ -22,13 +22,746 @@ struct pack_plan_3d {
|
|||
int nqty; // # of values/element
|
||||
};
|
||||
|
||||
// function prototypes
|
||||
|
||||
void pack_3d(double *, double *, struct pack_plan_3d *);
|
||||
void unpack_3d(double *, double *, struct pack_plan_3d *);
|
||||
void unpack_3d_permute1_1(double *, double *, struct pack_plan_3d *);
|
||||
void unpack_3d_permute1_2(double *, double *, struct pack_plan_3d *);
|
||||
void unpack_3d_permute1_n(double *, double *, struct pack_plan_3d *);
|
||||
void unpack_3d_permute2_1(double *, double *, struct pack_plan_3d *);
|
||||
void unpack_3d_permute2_2(double *, double *, struct pack_plan_3d *);
|
||||
void unpack_3d_permute2_n(double *, double *, struct pack_plan_3d *);
|
||||
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
|
||||
#define PACK_ARRAY
|
||||
#endif
|
||||
|
||||
#ifndef PACK_DATA
|
||||
#define PACK_DATA double
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Pack and unpack functions:
|
||||
|
||||
pack routines copy strided values from data into contiguous locs in buf
|
||||
unpack routines copy contiguous values from buf into strided locs in data
|
||||
different versions of unpack depending on permutation
|
||||
and # of values/element
|
||||
PACK_ARRAY routines work via array indices (default)
|
||||
PACK_POINTER routines work via pointers
|
||||
PACK_MEMCPY routines work via pointers and memcpy function
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack/unpack with array indices
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_ARRAY
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
in = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
out = plane + mid*nstride_line;
|
||||
for (fast = 0; fast < nfast; fast++)
|
||||
buf[in++] = data[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = plane + mid*nstride_line;
|
||||
for (fast = 0; fast < nfast; fast++)
|
||||
data[in++] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = plane + mid;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_plane)
|
||||
data[in] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = plane + 2*mid;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_plane) {
|
||||
data[in] = buf[out++];
|
||||
data[in+1] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,iqty,instart,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
instart = plane + nqty*mid;
|
||||
for (fast = 0; fast < nfast; fast++, instart += nstride_plane) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = slow + mid*nstride_plane;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_line)
|
||||
data[in] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = 2*slow + mid*nstride_plane;
|
||||
for (fast = 0; fast < nfast; fast++, in += nstride_line) {
|
||||
data[in] = buf[out++];
|
||||
data[in+1] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register int in,out,iqty,instart,fast,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = 0;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
instart = nqty*slow + mid*nstride_plane;
|
||||
for (fast = 0; fast < nfast; fast++, instart += nstride_line) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) data[in++] = buf[out++];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack/unpack with pointers
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_POINTER
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
in = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid*nstride_line]);
|
||||
end = begin + nfast;
|
||||
for (out = begin; out < end; out++)
|
||||
*(in++) = *out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid*nstride_line]);
|
||||
end = begin + nfast;
|
||||
for (in = begin; in < end; in++)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+2*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+nqty*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (instart = begin; instart < end; instart += nstride_plane) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[2*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[nqty*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (instart = begin; instart < end; instart += nstride_line) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack/unpack with pointers and memcpy function
|
||||
no memcpy version of unpack_permute routines,
|
||||
just use PACK_POINTER versions
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_MEMCPY
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void pack_3d(PACK_DATA *data, PACK_DATA *buf, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out;
|
||||
register int mid,slow,size;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
size = nfast*sizeof(PACK_DATA);
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
upto = slow*nmid*nfast;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = &(buf[upto+mid*nfast]);
|
||||
out = &(data[plane+mid*nstride_line]);
|
||||
memcpy(in,out,size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out;
|
||||
register int mid,slow,size;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,upto;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
size = nfast*sizeof(PACK_DATA);
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_plane;
|
||||
upto = slow*nmid*nfast;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
in = &(data[plane+mid*nstride_line]);
|
||||
out = &(buf[upto+mid*nfast]);
|
||||
memcpy(in,out,size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+2*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (in = begin; in < end; in += nstride_plane) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, one axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute1_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
plane = slow*nstride_line;
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[plane+nqty*mid]);
|
||||
end = begin + nfast*nstride_plane;
|
||||
for (instart = begin; instart < end; instart += nstride_plane) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 1 value/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_1(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line)
|
||||
*in = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, 2 values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_2(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*begin,*end;
|
||||
register int mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[2*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (in = begin; in < end; in += nstride_line) {
|
||||
*in = *(out++);
|
||||
*(in+1) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
unpack from buf -> data, two axis permutation, nqty values/element
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_plan_3d *plan)
|
||||
|
||||
{
|
||||
register PACK_DATA *in,*out,*instart,*begin,*end;
|
||||
register int iqty,mid,slow;
|
||||
register int nfast,nmid,nslow,nstride_line,nstride_plane,nqty;
|
||||
|
||||
nfast = plan->nfast;
|
||||
nmid = plan->nmid;
|
||||
nslow = plan->nslow;
|
||||
nstride_line = plan->nstride_line;
|
||||
nstride_plane = plan->nstride_plane;
|
||||
nqty = plan->nqty;
|
||||
|
||||
out = buf;
|
||||
for (slow = 0; slow < nslow; slow++) {
|
||||
for (mid = 0; mid < nmid; mid++) {
|
||||
begin = &(data[nqty*slow+mid*nstride_plane]);
|
||||
end = begin + nfast*nstride_line;
|
||||
for (instart = begin; instart < end; instart += nstride_line) {
|
||||
in = instart;
|
||||
for (iqty = 0; iqty < nqty; iqty++) *(in++) = *(out++);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue