draft the parallel_* constructs based on lambdas

LAMMPS_LAMBDA was added to kokkos_type.h to facilitate this. some member variables will likely need local copies in the fix_momentum code.
2016-12-19 12:02:12 -07:00 · 2016-12-19 12:02:12 -07:00 · a1b441a71f
parent f6f2170369
commit a1b441a71f
2 changed files with 67 additions and 69 deletions
--- a/src/KOKKOS/fix_momentum_kokkos.cpp
+++ b/src/KOKKOS/fix_momentum_kokkos.cpp
@ -52,6 +52,37 @@ void FixMomentumKokkos<DeviceType>::init()

 /* ---------------------------------------------------------------------- */

+template<class DeviceType>
+double FixMomentumKokkos<DeviceType>::get_kinetic_energy(
+    typename AT::t_v_array_randomread v,
+    typename AT::t_int_1d_randomread mask)
+{
+  double ke=0.0;
+  // D.I. : does this rmass check make sense in Kokkos mode ?
+  if (atom->rmass) {
+    atomKK->sync(execution_space, RMASS_MASK);
+    typename AT::t_float_1d_randomread rmass = atomKK->k_rmass;
+    Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(int i, double& update) {
+      if (mask(i) & groupbit)
+        update += rmass(i) *
+          (v(i,0)*v(i,0) + v(i,1)*v(i,1) + v(i,2)*v(i,2));
+    }, ke);
+  } else {
+    // D.I. : why is there no MASS_MASK ?
+    atomKK->sync(execution_space, TYPE_MASK);
+    typename AT::t_int_1d_randomread type = atomKK->k_type;
+    typename AT::t_float_1d_randomread mass = atomKK->k_mass;
+    Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(int i, double& update) {
+      if (mask(i) & groupbit)
+        update += mass(type(i)) *
+          (v(i,0)*v(i,0) + v(i,1)*v(i,1) + v(i,2)*v(i,2));
+    }, ke);
+  }
+  double ke_total;
+  MPI_Allreduce(&ke,&ke_total,1,MPI_DOUBLE,MPI_SUM,world);
+  return ke_total;
+}
+
 template<class DeviceType>
 void FixMomentumKokkos<DeviceType>::end_of_step()
 {
@ -73,31 +104,7 @@ void FixMomentumKokkos<DeviceType>::end_of_step()

  // compute kinetic energy before momentum removal, if needed

-  if (rescale) {
-    
-    atomKK->sync(execution_space, TYPE_MASK);
-
-    typename AT::t_int_1d_randomread type = atomKK->k_type;
-    double ke=0.0;
-
-    // D.I. : does this rmass check make sense in Kokkos mode ?
-    if (atom->rmass) {
-      atomKK->sync(execution_space, RMASS_MASK);
-      typename AT::t_float_1d_randomread rmass = atomKK->k_rmass;
-//    for (int i = 0; i < nlocal; i++)
-//      if (mask[i] & groupbit)
-//        ke += rmass[i] *
-//          (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]);
-    } else {
-      // D.I. : why is there no MASS_MASK ?
-      typename AT::t_float_1d_randomread mass = atomKK->k_mass;
-//    for (int i = 0; i < nlocal; i++)
-//      if (mask[i] & groupbit)
-//        ke +=  mass[type[i]] *
-//          (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]);
-    }
-    MPI_Allreduce(&ke,&ekin_old,1,MPI_DOUBLE,MPI_SUM,world);
-  }
+  if (rescale) ekin_old = get_kinetic_energy(v, mask);

  if (linear) {
    double vcm[3];
@ -106,12 +113,13 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
    // adjust velocities by vcm to zero linear momentum
    // only adjust a component if flag is set

-//  for (int i = 0; i < nlocal; i++)
-//    if (mask[i] & groupbit) {
-//      if (xflag) v[i][0] -= vcm[0];
-//      if (yflag) v[i][1] -= vcm[1];
-//      if (zflag) v[i][2] -= vcm[2];
-//    }
+    Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
+      if (mask(i) & groupbit) {
+        if (xflag) v(i,0) -= vcm(0);
+        if (yflag) v(i,1) -= vcm(1);
+        if (zflag) v(i,2) -= vcm(2);
+      }
+    });
  }

  if (angular) {
@ -130,52 +138,36 @@ void FixMomentumKokkos<DeviceType>::end_of_step()
    typename AT::t_imageint_1d_randomread image = atomKK->k_image.view<DeviceType>();
    int nlocal = atom->nlocal;

-//  for (int i = 0; i < nlocal; i++)
-//    if (mask[i] & groupbit) {
-//      double dx,dy,dz;
-//      double unwrap[3];
-//      domain->unmap(x[i],image[i],unwrap);
-//      dx = unwrap[0] - xcm[0];
-//      dy = unwrap[1] - xcm[1];
-//      dz = unwrap[2] - xcm[2];
-//      v[i][0] -= omega[1]*dz - omega[2]*dy;
-//      v[i][1] -= omega[2]*dx - omega[0]*dz;
-//      v[i][2] -= omega[0]*dy - omega[1]*dx;
-//    }
+    Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
+      if (mask[i] & groupbit) {
+        double dx,dy,dz;
+        double unwrap[3];
+        domain->unmap(x[i],image[i],unwrap);
+        dx = unwrap[0] - xcm[0];
+        dy = unwrap[1] - xcm[1];
+        dz = unwrap[2] - xcm[2];
+        v(i,0) -= omega[1]*dz - omega[2]*dy;
+        v(i,1) -= omega[2]*dx - omega[0]*dz;
+        v(i,2) -= omega[0]*dy - omega[1]*dx;
+      }
+    });
  }

  // compute kinetic energy after momentum removal, if needed

  if (rescale) {

-    double ke=0.0, factor=1.0;
-    typename AT::t_int_1d_randomread type = atomKK->k_type;
-
-    if (atom->rmass) {
-      atomKK->sync(execution_space, RMASS_MASK);
-      typename AT::t_float_1d_randomread rmass = atomKK->k_rmass;
-//    for (int i = 0; i < nlocal; i++)
-//      if (mask[i] & groupbit)
-//        ke += rmass[i] *
-//          (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]);
-    } else {
-      // D.I. : why is there no MASS_MASK ?
-      typename AT::t_float_1d_randomread mass = atomKK->k_mass;
-//    for (int i = 0; i < nlocal; i++)
-//      if (mask[i] & groupbit)
-//        ke +=  mass[type[i]] *
-//          (v[i][0]*v[i][0] + v[i][1]*v[i][1] + v[i][2]*v[i][2]);
-    }
-    MPI_Allreduce(&ke,&ekin_new,1,MPI_DOUBLE,MPI_SUM,world);
+    ekin_new = get_kinetic_energy(v, mask);

+    double factor = 1.0;
    if (ekin_new != 0.0) factor = sqrt(ekin_old/ekin_new);
-//  for (int i = 0; i < nlocal; i++) {
-//    if (mask[i] & groupbit) {
-//      v[i][0] *= factor;
-//      v[i][1] *= factor;
-//      v[i][2] *= factor;
-//    }
-//  }
+    Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(int i) {
+      if (mask(i) & groupbit) {
+        v(i,0) *= factor;
+        v(i,1) *= factor;
+        v(i,2) *= factor;
+      }
+    });
  }
 }

--- a/src/KOKKOS/kokkos_type.h
+++ b/src/KOKKOS/kokkos_type.h
@ -920,4 +920,10 @@ void memset_kokkos (ViewType &view) {
 #define ISFINITE(x) std::isfinite(x)
 #endif

+#ifdef KOKKOS_HAVE_CUDA
+#define LAMMPS_LAMBDA [=] __device__
+#else
+#define LAMMPS_LAMBDA [=]
+#endif
+
 #endif