From 31ed3f7178a349b5d47827eb8fefea90b034e7cd Mon Sep 17 00:00:00 2001 From: "Steven J. Plimpton" <sjplimp@singsing.sandia.gov> Date: Mon, 27 Aug 2018 17:09:59 -0600 Subject: [PATCH] more changes to insure each triplet IJK computed exactly once --- ...og.23Aug18.atm.g++.1 => log.27Aug18.g++.1} | 32 +++++------ ...og.23Aug18.atm.g++.4 => log.27Aug18.g++.4} | 32 +++++------ src/MANYBODY/pair_atm.cpp | 54 ++++++++----------- 3 files changed, 53 insertions(+), 65 deletions(-) rename examples/atm/{log.23Aug18.atm.g++.1 => log.27Aug18.g++.1} (69%) rename examples/atm/{log.23Aug18.atm.g++.4 => log.27Aug18.g++.4} (69%) diff --git a/examples/atm/log.23Aug18.atm.g++.1 b/examples/atm/log.27Aug18.g++.1 similarity index 69% rename from examples/atm/log.23Aug18.atm.g++.1 rename to examples/atm/log.27Aug18.g++.1 index 3373846b31..46215e108c 100644 --- a/examples/atm/log.23Aug18.atm.g++.1 +++ b/examples/atm/log.27Aug18.g++.1 @@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252) 1 by 1 by 1 MPI processor grid create_atoms 1 box Created 4000 atoms - Time spent = 0.00126314 secs + Time spent = 0.00139618 secs pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5 pair_coeff * * lj/cut 1.0 1.0 @@ -60,26 +60,26 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 11.47 | 11.47 | 11.47 Mbytes Step Temp E_pair E_mol TotEng Press - 0 1.033 -4.8899813 0 -3.3408686 -4.2298176 - 5 1.0337853 -4.8928208 0 -3.3425304 -4.2233154 - 10 1.0358056 -4.8953304 0 -3.3420104 -4.1897183 - 15 1.0380938 -4.8990457 0 -3.3422942 -4.1310148 - 20 1.0389566 -4.9014345 0 -3.3433892 -4.0406616 - 25 1.0358313 -4.8989663 0 -3.3456079 -3.9093019 -Loop time of 12.2062 on 1 procs for 25 steps with 4000 atoms + 0 1.033 -4.8404387 0 -3.291326 -4.1332095 + 5 1.0337247 -4.8402263 0 -3.290027 -4.1207962 + 10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158 + 15 1.0376519 -4.84599 0 -3.2899013 -4.0278711 + 20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052 + 25 1.0347886 -4.84473 0 -3.2929351 -3.8044469 +Loop time of 15.95 on 1 procs for 25 steps with 4000 atoms -Performance: 353.920 tau/day, 2.048 timesteps/s -99.9% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 270.846 tau/day, 1.567 timesteps/s +100.0% CPU use with 1 MPI tasks x no OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 12.202 | 12.202 | 12.202 | 0.0 | 99.96 +Pair | 15.946 | 15.946 | 15.946 | 0.0 | 99.97 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.0015621 | 0.0015621 | 0.0015621 | 0.0 | 0.01 -Output | 0.00020814 | 0.00020814 | 0.00020814 | 0.0 | 0.00 -Modify | 0.0019698 | 0.0019698 | 0.0019698 | 0.0 | 0.02 -Other | | 0.0007734 | | | 0.01 +Comm | 0.0015042 | 0.0015042 | 0.0015042 | 0.0 | 0.01 +Output | 0.00013781 | 0.00013781 | 0.00013781 | 0.0 | 0.00 +Modify | 0.0017776 | 0.0017776 | 0.0017776 | 0.0 | 0.01 +Other | | 0.0006771 | | | 0.00 Nlocal: 4000 ave 4000 max 4000 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -97,4 +97,4 @@ Dangerous builds = 0 Please see the log.cite file for references relevant to this simulation -Total wall time: 0:00:13 +Total wall time: 0:00:16 diff --git a/examples/atm/log.23Aug18.atm.g++.4 b/examples/atm/log.27Aug18.g++.4 similarity index 69% rename from examples/atm/log.23Aug18.atm.g++.4 rename to examples/atm/log.27Aug18.g++.4 index d5edfe32ba..d84f17ee2b 100644 --- a/examples/atm/log.23Aug18.atm.g++.4 +++ b/examples/atm/log.27Aug18.g++.4 @@ -26,7 +26,7 @@ Created orthogonal box = (0 0 0) to (18.3252 18.3252 18.3252) 1 by 2 by 2 MPI processor grid create_atoms 1 box Created 4000 atoms - Time spent = 0.000785112 secs + Time spent = 0.000900984 secs pair_style hybrid/overlay lj/cut 4.5 atm 4.5 2.5 pair_coeff * * lj/cut 1.0 1.0 @@ -60,26 +60,26 @@ Neighbor list info ... bin: standard Per MPI rank memory allocation (min/avg/max) = 5.532 | 5.532 | 5.532 Mbytes Step Temp E_pair E_mol TotEng Press - 0 1.033 -4.8921547 0 -3.343042 -4.2340557 - 5 1.0337949 -4.8947881 0 -3.3444835 -4.2271456 - 10 1.0358286 -4.8973178 0 -3.3439632 -4.1935779 - 15 1.0381322 -4.9010593 0 -3.3442503 -4.134913 - 20 1.0390107 -4.9034854 0 -3.3453589 -4.0446162 - 25 1.0358988 -4.9010506 0 -3.3475908 -3.9133006 -Loop time of 3.20632 on 4 procs for 25 steps with 4000 atoms + 0 1.033 -4.8404387 0 -3.291326 -4.1332095 + 5 1.0337247 -4.8402263 0 -3.290027 -4.1207962 + 10 1.0355935 -4.8425889 0 -3.2895869 -4.0870158 + 15 1.0376519 -4.84599 0 -3.2899013 -4.0278711 + 20 1.0382257 -4.8478854 0 -3.2909361 -3.9368052 + 25 1.0347886 -4.84473 0 -3.2929351 -3.8044469 +Loop time of 4.34636 on 4 procs for 25 steps with 4000 atoms -Performance: 1347.340 tau/day, 7.797 timesteps/s -100.0% CPU use with 4 MPI tasks x no OpenMP threads +Performance: 993.935 tau/day, 5.752 timesteps/s +99.6% CPU use with 4 MPI tasks x no OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 3.1207 | 3.1553 | 3.1859 | 1.5 | 98.41 +Pair | 3.9977 | 4.1036 | 4.209 | 4.9 | 94.41 Neigh | 0 | 0 | 0 | 0.0 | 0.00 -Comm | 0.019466 | 0.05009 | 0.084602 | 12.0 | 1.56 -Output | 7.1049e-05 | 8.2076e-05 | 0.00011325 | 0.0 | 0.00 -Modify | 0.00056338 | 0.00057292 | 0.00058413 | 0.0 | 0.02 -Other | | 0.0003092 | | | 0.01 +Comm | 0.13588 | 0.24134 | 0.34722 | 20.4 | 5.55 +Output | 0.00013757 | 0.00015104 | 0.00016761 | 0.0 | 0.00 +Modify | 0.00087953 | 0.00091547 | 0.00095582 | 0.0 | 0.02 +Other | | 0.0003656 | | | 0.01 Nlocal: 1000 ave 1000 max 1000 min Histogram: 4 0 0 0 0 0 0 0 0 0 @@ -97,4 +97,4 @@ Dangerous builds = 0 Please see the log.cite file for references relevant to this simulation -Total wall time: 0:00:03 +Total wall time: 0:00:04 diff --git a/src/MANYBODY/pair_atm.cpp b/src/MANYBODY/pair_atm.cpp index b63407dfe9..a6e52faeba 100644 --- a/src/MANYBODY/pair_atm.cpp +++ b/src/MANYBODY/pair_atm.cpp @@ -98,9 +98,15 @@ void PairATM::compute(int eflag, int vflag) numneigh = list->numneigh; firstneigh = list->firstneigh; - int count1 = 0; - int count2 = 0; - int count3 = 0; + // triple loop over local atoms and neighbors twice + // must compute each IJK triplet interaction exactly once + // by proc that owns the triplet atom with smallest x coord + // special logic to break ties if multiple atoms have same x or y coords + // inner two loops for jj=1,Jnum and kk=jj+1,Jnum insure + // the pair of other 2 non-minimum-x atoms is only considered once + // triplet geometry criteria for calculation: + // each pair distance <= cutoff + // produce of 3 pair distances <= cutoff_triple^3 for (ii = 0; ii < inum; ii++) { i = ilist[ii]; @@ -112,12 +118,10 @@ void PairATM::compute(int eflag, int vflag) jnum = numneigh[i]; jnumm1 = jnum - 1; - // for (jj = 0; jj < jnumm1; jj++) { - // replace with this line: - for (jj = 0; jj < jnum; jj++) { - + for (jj = 0; jj < jnumm1; jj++) { j = jlist[jj]; j &= NEIGHMASK; + rij[0] = x[j][0] - xi; if (rij[0] < 0.0) continue; rij[1] = x[j][1] - yi; @@ -125,40 +129,33 @@ void PairATM::compute(int eflag, int vflag) rij[2] = x[j][2] - zi; if (rij[0] == 0.0 and rij[1] == 0.0 and rij[2] < 0.0) continue; rij2 = rij[0]*rij[0] + rij[1]*rij[1] + rij[2]*rij[2]; - count1++; if (rij2 > cutoff_squared) continue; - count2++; - - //for (kk = jj+1; kk < jnum; kk++) { - // replace with these two lines: - for (kk = 0; kk < jnum; kk++) { - if (kk == jj) continue; + for (kk = jj+1; kk < jnum; kk++) { k = jlist[kk]; k &= NEIGHMASK; - rjk[0] = x[k][0] - x[j][0]; - if (rjk[0] < 0.0) continue; - rjk[1] = x[k][1] - x[j][1]; - if (rjk[0] == 0.0 and rjk[1] < 0.0) continue; - rjk[2] = x[k][2] - x[j][2]; - if (rjk[0] == 0.0 and rjk[1] == 0.0 and rjk[2] < 0.0) continue; - rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2]; - if (rjk2 > cutoff_squared) continue; - rik[0] = x[k][0] - xi; + if (rik[0] < 0.0) continue; rik[1] = x[k][1] - yi; + if (rik[0] == 0.0 and rik[1] < 0.0) continue; rik[2] = x[k][2] - zi; + if (rik[0] == 0.0 and rik[1] == 0.0 and rik[2] < 0.0) continue; rik2 = rik[0]*rik[0] + rik[1]*rik[1] + rik[2]*rik[2]; if (rik2 > cutoff_squared) continue; + rjk[0] = x[k][0] - x[j][0]; + rjk[1] = x[k][1] - x[j][1]; + rjk[2] = x[k][2] - x[j][2]; + rjk2 = rjk[0]*rjk[0] + rjk[1]*rjk[1] + rjk[2]*rjk[2]; + if (rjk2 > cutoff_squared) continue; + double r6 = rij2*rjk2*rik2; if (r6 > cutoff_triple_sixth) continue; nu_local = nu[type[i]][type[j]][type[k]]; if (nu_local == 0.0) continue; - count3++; interaction_ddd(nu_local, r6,rij2,rik2,rjk2,rij,rik,rjk,fj,fk,eflag,evdwl); @@ -177,15 +174,6 @@ void PairATM::compute(int eflag, int vflag) } } - int count = count1; - MPI_Allreduce(&count,&count1,1,MPI_INT,MPI_SUM,world); - count = count2; - MPI_Allreduce(&count,&count2,1,MPI_INT,MPI_SUM,world); - count = count3; - MPI_Allreduce(&count,&count3,1,MPI_INT,MPI_SUM,world); - printf("FORCE %g %d %d %d\n",cutoff_squared,count1,count2,count3); - - if (vflag_fdotr) virial_fdotr_compute(); }