git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@5906 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2011-04-07 16:07:43 +00:00
parent f3feb53326
commit 3480e40e5f
10 changed files with 105 additions and 130 deletions

View File

@ -75,6 +75,10 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp4* lj3, const int lj_types,
__global numtyp *sp_lj_in, __global int *dev_nbor,
@ -114,12 +118,9 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
for ( ; nbor<list_end; nbor+=nbor_pitch) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -227,12 +228,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
for ( ; nbor<list_end; nbor+=nbor_pitch) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int mtype=itype+jx.w;

View File

@ -93,6 +93,10 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp4* lj3, const int lj_types,
__global numtyp *sp_lj_in, __global int *dev_nbor,
@ -139,14 +143,10 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)0.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = (numtyp)1.0-sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -282,14 +282,10 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)0.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = (numtyp)1.0-sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int mtype=itype+jx.w;

View File

@ -93,6 +93,10 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
const int lj_types,
__global numtyp *sp_lj_in, __global int *dev_nbor,
@ -142,14 +146,10 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)0.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = (numtyp)1.0-sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -287,14 +287,10 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)0.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = (numtyp)1.0-sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -390,4 +386,3 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp2 *ljd_in,
}
#endif

View File

@ -22,6 +22,10 @@
#include "gb_gpu_extra.h"
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__inline void compute_eta_torque(numtyp m[9],numtyp m2[9], const numtyp4 shape,
numtyp ans[9])
{
@ -142,12 +146,9 @@ __kernel void kernel_gayberne(__global numtyp4* x_,__global numtyp4 *q,
for ( ; nbor<nbor_end; nbor+=stride) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=x_[j];
int jtype=jx.w;

View File

@ -22,6 +22,10 @@
#include "gb_gpu_extra.h"
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_sphere_gb(__global numtyp4 *x_,__global numtyp4 *q,
__global numtyp4* shape,__global numtyp4* well,
__global numtyp *gum, __global numtyp2* sig_eps,
@ -68,12 +72,9 @@ __kernel void kernel_sphere_gb(__global numtyp4 *x_,__global numtyp4 *q,
for ( ; nbor<nbor_end; nbor+=stride) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=x_[j];
int jtype=jx.w;
@ -299,12 +300,9 @@ __kernel void kernel_lj(__global numtyp4 *x_, __global numtyp4 *lj1,
for ( ; nbor<list_end; nbor+=stride) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=x_[j];
int jtype=jx.w;
@ -409,12 +407,9 @@ __kernel void kernel_lj_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
for ( ; nbor<list_end; nbor+=stride) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=x_[j];
int mtype=itype+jx.w;

View File

@ -75,6 +75,10 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp4* lj3, const int lj_types,
__global numtyp *sp_lj_in, __global int *dev_nbor,
@ -114,12 +118,9 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
for ( ; nbor<list_end; nbor+=nbor_pitch) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -220,12 +221,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
for ( ; nbor<list_end; nbor+=nbor_pitch) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int mtype=itype+jx.w;

View File

@ -75,6 +75,10 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp4* lj3, const int lj_types,
__global numtyp *sp_lj_in, __global int *dev_nbor,
@ -114,12 +118,9 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
for ( ; nbor<list_end; nbor+=nbor_pitch) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -219,12 +220,9 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
for ( ; nbor<list_end; nbor+=nbor_pitch) {
int j=*nbor;
if (j < nall)
factor_lj = (numtyp)1.0;
else {
factor_lj = sp_lj[j/nall];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int mtype=itype+jx.w;

View File

@ -85,6 +85,10 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp4* lj3, const int lj_types,
__global numtyp *sp_lj_in, __global int *dev_nbor,
@ -131,14 +135,10 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)1.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -260,14 +260,10 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)1.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int mtype=itype+jx.w;

View File

@ -93,6 +93,10 @@ __inline float fetch_q(const int& i, const float *q)
#endif
#define SBBITS 30
#define NEIGHMASK 0x3FFFFFFF
__inline int sbmask(int j) { return j >> SBBITS & 3; }
__kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
__global numtyp4* lj3, const int lj_types,
__global numtyp *sp_lj_in, __global int *dev_nbor,
@ -139,14 +143,10 @@ __kernel void kernel_pair(__global numtyp4 *x_, __global numtyp4 *lj1,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)0.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = (numtyp)1.0-sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int jtype=jx.w;
@ -274,14 +274,10 @@ __kernel void kernel_pair_fast(__global numtyp4 *x_, __global numtyp4 *lj1_in,
int j=*nbor;
numtyp factor_lj, factor_coul;
if (j < nall) {
factor_lj = (numtyp)1.0;
factor_coul = (numtyp)0.0;
} else {
factor_lj = sp_lj[j/nall];
factor_coul = (numtyp)1.0-sp_lj[j/nall+4];
j %= nall;
}
factor_lj = sp_lj[sbmask(j)];
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
j &= NEIGHMASK;
numtyp4 jx=fetch_pos(j,x_); //x_[j];
int mtype=itype+jx.w;

View File

@ -57,6 +57,8 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)
#define CELL_BLOCK_SIZE 64
#define BLOCK_2D 8
#define SBBITS 30
__kernel void transpose(int *out, int *in, int columns_in, int rows_in)
{
__local float block[BLOCK_2D][BLOCK_2D+1];
@ -279,16 +281,16 @@ __kernel void kernel_special(__global int *dev_nbor,
int offset=ii;
for (int i=0; i<n3; i++) {
if (special[offset]==jtag) {
nbor+=nall;
int which = 1;
if (i>=n1)
nbor+=nall;
which++;
if (i>=n2)
nbor+=nall;
which++;
nbor=nbor ^ (which << SBBITS);
*list=nbor;
}
offset+=nt;
}
if (nbor>=nall)
*list=nbor;
}
} // if ii
}