forked from lijiext/lammps
more changes to insure each triplet IJK computed exactly once
This commit is contained in:
parent
c01fb5f701
commit
31ed3f7178
|
@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
|
|||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 4000 atoms
|
||||
Time spent = 0.00126314 secs
|
||||
Time spent = 0.00139618 secs
|
||||
|
||||
pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
|
||||
pair_coeff * * lj/cut 1.0 1.0
|
||||
|
@ -60,26 +60,26 @@ Neighbor list info ...
|
|||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 11.47 | 11.47 | 11.47 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 1.033 -4.8899813 0 -3.3408686 -4.2298176
|
||||
5 1.0337853 -4.8928208 0 -3.3425304 -4.2233154
|
||||
10 1.0358056 -4.8953304 0 -3.3420104 -4.1897183
|
||||
15 1.0380938 -4.8990457 0 -3.3422942 -4.1310148
|
||||
20 1.0389566 -4.9014345 0 -3.3433892 -4.0406616
|
||||
25 1.0358313 -4.8989663 0 -3.3456079 -3.9093019
|
||||
Loop time of 12.2062 on 1 procs for 25 steps with 4000 atoms
|
||||
0 1.033 -4.8404387 0 -3.291326 -4.1332095
|
||||
5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
|
||||
10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
|
||||
15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
|
||||
20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
|
||||
25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
|
||||
Loop time of 15.95 on 1 procs for 25 steps with 4000 atoms
|
||||
|
||||
Performance: 353.920 tau/day, 2.048 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 270.846 tau/day, 1.567 timesteps/s
|
||||
100.0% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 12.202 | 12.202 | 12.202 | 0.0 | 99.96
|
||||
Pair | 15.946 | 15.946 | 15.946 | 0.0 | 99.97
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.0015621 | 0.0015621 | 0.0015621 | 0.0 | 0.01
|
||||
Output | 0.00020814 | 0.00020814 | 0.00020814 | 0.0 | 0.00
|
||||
Modify | 0.0019698 | 0.0019698 | 0.0019698 | 0.0 | 0.02
|
||||
Other | | 0.0007734 | | | 0.01
|
||||
Comm | 0.0015042 | 0.0015042 | 0.0015042 | 0.0 | 0.01
|
||||
Output | 0.00013781 | 0.00013781 | 0.00013781 | 0.0 | 0.00
|
||||
Modify | 0.0017776 | 0.0017776 | 0.0017776 | 0.0 | 0.01
|
||||
Other | | 0.0006771 | | | 0.00
|
||||
|
||||
Nlocal: 4000 ave 4000 max 4000 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
@ -97,4 +97,4 @@ Dangerous builds = 0
|
|||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:13
|
||||
Total wall time: 0:00:16
|
|
@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252)
|
|||
1 by 2 by 2 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 4000 atoms
|
||||
Time spent = 0.000785112 secs
|
||||
Time spent = 0.000900984 secs
|
||||
|
||||
pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5
|
||||
pair_coeff * * lj/cut 1.0 1.0
|
||||
|
@ -60,26 +60,26 @@ Neighbor list info ...
|
|||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.532 | 5.532 | 5.532 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 1.033 -4.8921547 0 -3.343042 -4.2340557
|
||||
5 1.0337949 -4.8947881 0 -3.3444835 -4.2271456
|
||||
10 1.0358286 -4.8973178 0 -3.3439632 -4.1935779
|
||||
15 1.0381322 -4.9010593 0 -3.3442503 -4.134913
|
||||
20 1.0390107 -4.9034854 0 -3.3453589 -4.0446162
|
||||
25 1.0358988 -4.9010506 0 -3.3475908 -3.9133006
|
||||
Loop time of 3.20632 on 4 procs for 25 steps with 4000 atoms
|
||||
0 1.033 -4.8404387 0 -3.291326 -4.1332095
|
||||
5 1.0337247 -4.8402263 0 -3.290027 -4.1207962
|
||||
10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158
|
||||
15 1.0376519 -4.84599 0 -3.2899013 -4.0278711
|
||||
20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052
|
||||
25 1.0347886 -4.84473 0 -3.2929351 -3.8044469
|
||||
Loop time of 4.34636 on 4 procs for 25 steps with 4000 atoms
|
||||
|
||||
Performance: 1347.340 tau/day, 7.797 timesteps/s
|
||||
100.0% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 993.935 tau/day, 5.752 timesteps/s
|
||||
99.6% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 3.1207 | 3.1553 | 3.1859 | 1.5 | 98.41
|
||||
Pair | 3.9977 | 4.1036 | 4.209 | 4.9 | 94.41
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.019466 | 0.05009 | 0.084602 | 12.0 | 1.56
|
||||
Output | 7.1049e-05 | 8.2076e-05 | 0.00011325 | 0.0 | 0.00
|
||||
Modify | 0.00056338 | 0.00057292 | 0.00058413 | 0.0 | 0.02
|
||||
Other | | 0.0003092 | | | 0.01
|
||||
Comm | 0.13588 | 0.24134 | 0.34722 | 20.4 | 5.55
|
||||
Output | 0.00013757 | 0.00015104 | 0.00016761 | 0.0 | 0.00
|
||||
Modify | 0.00087953 | 0.00091547 | 0.00095582 | 0.0 | 0.02
|
||||
Other | | 0.0003656 | | | 0.01
|
||||
|
||||
Nlocal: 1000 ave 1000 max 1000 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
|
@ -97,4 +97,4 @@ Dangerous builds = 0
|
|||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:03
|
||||
Total wall time: 0:00:04
|
|
@ -98,9 +98,15 @@ void PairATM::compute(int eflag, int vflag)
|
|||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
int count1 = 0;
|
||||
int count2 = 0;
|
||||
int count3 = 0;
|
||||
// triple loop over local atoms and neighbors twice
|
||||
// must compute each IJK triplet interaction exactly once
|
||||
// by proc that owns the triplet atom with smallest x coord
|
||||
// special logic to break ties if multiple atoms have same x or y coords
|
||||
// inner two loops for jj=1,Jnum and kk=jj+1,Jnum insure
|
||||
// the pair of other 2 non-minimum-x atoms is only considered once
|
||||
// triplet geometry criteria for calculation:
|
||||
// each pair distance <= cutoff
|
||||
// produce of 3 pair distances <= cutoff_triple^3
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
|
@ -112,12 +118,10 @@ void PairATM::compute(int eflag, int vflag)
|
|||
jnum = numneigh[i];
|
||||
jnumm1 = jnum - 1;
|
||||
|
||||
// for (jj = 0; jj < jnumm1; jj++) {
|
||||
// replace with this line:
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
|
||||
for (jj = 0; jj < jnumm1; jj++) {
|
||||
j = jlist[jj];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
rij[0] = x[j][0] - xi;
|
||||
if (rij[0] < 0.0) continue;
|
||||
rij[1] = x[j][1] - yi;
|
||||
|
@ -125,40 +129,33 @@ void PairATM::compute(int eflag, int vflag)
|
|||
rij[2] = x[j][2] - zi;
|
||||
if (rij[0] == 0.0 and rij[1] == 0.0 and rij[2] < 0.0) continue;
|
||||
rij2 = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2];
|
||||
count1++;
|
||||
if (rij2 > cutoff_squared) continue;
|
||||
count2++;
|
||||
|
||||
//for (kk = jj+1; kk < jnum; kk++) {
|
||||
// replace with these two lines:
|
||||
for (kk = 0; kk < jnum; kk++) {
|
||||
if (kk == jj) continue;
|
||||
|
||||
for (kk = jj+1; kk < jnum; kk++) {
|
||||
k = jlist[kk];
|
||||
k &= NEIGHMASK;
|
||||
|
||||
rjk[0] = x[k][0] - x[j][0];
|
||||
if (rjk[0] < 0.0) continue;
|
||||
rjk[1] = x[k][1] - x[j][1];
|
||||
if (rjk[0] == 0.0 and rjk[1] < 0.0) continue;
|
||||
rjk[2] = x[k][2] - x[j][2];
|
||||
if (rjk[0] == 0.0 and rjk[1] == 0.0 and rjk[2] < 0.0) continue;
|
||||
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
|
||||
if (rjk2 > cutoff_squared) continue;
|
||||
|
||||
rik[0] = x[k][0] - xi;
|
||||
if (rik[0] < 0.0) continue;
|
||||
rik[1] = x[k][1] - yi;
|
||||
if (rik[0] == 0.0 and rik[1] < 0.0) continue;
|
||||
rik[2] = x[k][2] - zi;
|
||||
if (rik[0] == 0.0 and rik[1] == 0.0 and rik[2] < 0.0) continue;
|
||||
rik2 = rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2];
|
||||
if (rik2 > cutoff_squared) continue;
|
||||
|
||||
rjk[0] = x[k][0] - x[j][0];
|
||||
rjk[1] = x[k][1] - x[j][1];
|
||||
rjk[2] = x[k][2] - x[j][2];
|
||||
rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2];
|
||||
if (rjk2 > cutoff_squared) continue;
|
||||
|
||||
double r6 = rij2*rjk2*rik2;
|
||||
if (r6 > cutoff_triple_sixth) continue;
|
||||
|
||||
nu_local = nu[type[i]][type[j]][type[k]];
|
||||
if (nu_local == 0.0) continue;
|
||||
|
||||
count3++;
|
||||
interaction_ddd(nu_local,
|
||||
r6,rij2,rik2,rjk2,rij,rik,rjk,fj,fk,eflag,evdwl);
|
||||
|
||||
|
@ -177,15 +174,6 @@ void PairATM::compute(int eflag, int vflag)
|
|||
}
|
||||
}
|
||||
|
||||
int count = count1;
|
||||
MPI_Allreduce(&count,&count1,1,MPI_INT,MPI_SUM,world);
|
||||
count = count2;
|
||||
MPI_Allreduce(&count,&count2,1,MPI_INT,MPI_SUM,world);
|
||||
count = count3;
|
||||
MPI_Allreduce(&count,&count3,1,MPI_INT,MPI_SUM,world);
|
||||
printf("FORCE %g %d %d %d\n",cutoff_squared,count1,count2,count3);
|
||||
|
||||
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue