git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15226 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2016-06-28 13:29:16 +00:00 · 2016-06-28 13:29:16 +00:00 · 788ba55436
parent cec2f2518f
commit 788ba55436
12 changed files with 103 additions and 17 deletions
--- a/src/USER-INTEL/Install.sh
+++ b/src/USER-INTEL/Install.sh
@ -42,6 +42,8 @@ action intel_buffers.cpp
 action math_extra_intel.h
 action intel_simd.h pair_sw_intel.cpp
 action intel_intrinsics.h pair_tersoff_intel.cpp
+action verlet_lrt_intel.h pppm.cpp
+action verlet_lrt_intel.cpp pppm.cpp

 # step 2: handle cases and tasks not handled in step 1.

--- a/src/USER-INTEL/README
+++ b/src/USER-INTEL/README
@ -26,16 +26,18 @@ This package is based on the USER-OMP package and provides LAMMPS styles that:
 When using the suffix command with "intel", intel styles will be used if they
 exist. If the suffix command is used with "hybrid intel omp" and the USER-OMP 
 USER-OMP styles will be used whenever USER-INTEL styles are not available. This
-allow for running most styles in LAMMPS with threading. For example, in the
-latter case with the USER-OMP package installed,
+allow for running most styles in LAMMPS with threading.

-      kspace_style     pppm 1e-4
+-----------------------------------------------------------------------------

-is equivalent to:
-
-      kspace_style     pppm/omp   1e-4
-
-because no pppm style has been implemented for the Intel package.
+The Long-Range Thread mode (LRT) in the Intel package currently uses
+pthreads by default. If pthreads are not supported in the build environment,
+the compile flag "-DLMP_INTEL_NOLRT" will disable the feature to allow for 
+builds without pthreads. Alternatively, "-DLMP_INTEL_LRT11" can be used to
+build with compilers that support threads using the C++11 standard. When using
+LRT mode, you might need to disable OpenMP affinity settings (e.g.
+export KMP_AFFINITY=none). LAMMPS will generate a warning if the settings
+need to be changed.

 -----------------------------------------------------------------------------

--- a/src/USER-INTEL/fix_intel.cpp
+++ b/src/USER-INTEL/fix_intel.cpp
@ -94,6 +94,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
  int nomp = 0, no_affinity = 0;
  _allow_separate_buffers = 1;
  _offload_ghost = -1;
+  _lrt = 0;

  int iarg = 4;
  while (iarg < narg) {
@ -132,6 +133,12 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
    } else if (strcmp(arg[iarg],"no_affinity") == 0) {
      no_affinity = 1;
      iarg++;
+    } else if (strcmp(arg[iarg], "lrt") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
+      if (strcmp(arg[iarg+1],"yes") == 0) _lrt = 1;
+      else if (strcmp(arg[iarg+1],"no") == 0) _lrt = 0;
+      else error->all(FLERR,"Illegal package intel command");
+      iarg += 2;
    }

    // undocumented options
@ -152,6 +159,13 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
    _offload_balance = 0.0;
  }

+  // if using LRT mode, create the integrate style
+  if (_lrt) {
+    char *str;
+    str = (char *) "verlet/lrt/intel";
+    update->create_integrate(1,&str,0);
+  }
+
  // error check

  if (_offload_balance > 1.0 || _offload_threads < 0 ||
--- a/src/USER-INTEL/fix_intel.h
+++ b/src/USER-INTEL/fix_intel.h
@ -74,6 +74,10 @@ class FixIntel : public Fix {
    return 0;
  }
  inline void set_reduce_flag() { _need_reduce = 1; }
+  inline int lrt() {
+    if (force->kspace_match("pppm/intel", 0)) return _lrt;
+    else return 0;
+  }

 protected:
  IntelBuffers<float,float> *_single_buffers;
@ -152,7 +156,7 @@ class FixIntel : public Fix {
 protected:
  int _overflow_flag[5];
  _alignvar(int _off_overflow_flag[5],64);
-  int _allow_separate_buffers, _offload_ghost;
+  int _allow_separate_buffers, _offload_ghost, _lrt;

  IntelBuffers<float,float>::vec3_acc_t *_force_array_s;
  IntelBuffers<float,double>::vec3_acc_t *_force_array_m;
--- a/src/USER-INTEL/pair_buck_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_buck_coul_long_intel.cpp
@ -83,7 +83,7 @@ void PairBuckCoulLongIntel::compute(int eflag, int vflag,
  const int offload_end = fix->offload_end_pair();
  const int ago = neighbor->ago;

-  if (ago != 0 && fix->separate_buffers() == 0) {
+  if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) {
    fix->start_watch(TIME_PACK);
    #if defined(_OPENMP)
    #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
@ -476,6 +476,8 @@ void PairBuckCoulLongIntel::init_style()
    pack_force_const(force_const_double, fix->get_double_buffers());
  else
    pack_force_const(force_const_single, fix->get_single_buffers());
+
+  _lrt = fix->lrt();
 }

 template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_buck_coul_long_intel.h
+++ b/src/USER-INTEL/pair_buck_coul_long_intel.h
@ -40,7 +40,7 @@ class PairBuckCoulLongIntel : public PairBuckCoulLong {

 private:
  FixIntel *fix;
-  int _cop;
+  int _cop, _lrt;

  template <class flt_t> class ForceConst;

--- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
@ -80,7 +80,7 @@ void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag,
  const int offload_end = fix->offload_end_pair();
  const int ago = neighbor->ago;

-  if (ago != 0 && fix->separate_buffers() == 0) {
+  if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) {
    fix->start_watch(TIME_PACK);
    #if defined(_OPENMP)
    #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
@ -504,6 +504,8 @@ void PairLJCharmmCoulLongIntel::init_style()
    pack_force_const(force_const_double, fix->get_double_buffers());
  else
    pack_force_const(force_const_single, fix->get_single_buffers());
+
+  _lrt = fix->lrt();
 }

 template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
+++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
@ -42,7 +42,7 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {

 private:
  FixIntel *fix;
-  int _cop;
+  int _cop, _lrt;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
@ -81,7 +81,7 @@ void PairLJCutCoulLongIntel::compute(int eflag, int vflag,
  const int offload_end = fix->offload_end_pair();
  const int ago = neighbor->ago;

-  if (ago != 0 && fix->separate_buffers() == 0) {
+  if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) {
    fix->start_watch(TIME_PACK);
    #if defined(_OPENMP)
    #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
@ -472,6 +472,8 @@ void PairLJCutCoulLongIntel::init_style()
    pack_force_const(force_const_double, fix->get_double_buffers());
  else
    pack_force_const(force_const_single, fix->get_single_buffers());
+
+  _lrt = fix->lrt();
 }

 template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.h
+++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.h
@ -42,7 +42,7 @@ class PairLJCutCoulLongIntel : public PairLJCutCoulLong {

 private:
  FixIntel *fix;
-  int _cop;
+  int _cop, _lrt;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pppm_intel.cpp
+++ b/src/USER-INTEL/pppm_intel.cpp
@ -12,7 +12,8 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Rodrigo Canales (RWTH Aachen University)
+   Contributing authors: Rodrigo Canales (RWTH Aachen University)
+                         W. Michael Brown (Intel)
 ------------------------------------------------------------------------- */

 #include <mpi.h>
@ -107,7 +108,14 @@ void PPPMIntel::compute(int eflag, int vflag)
    return;
  }
  #endif
+  compute_first(eflag,vflag);
+  compute_second(eflag,vflag);
+}

+/* ---------------------------------------------------------------------- */
+
+void PPPMIntel::compute_first(int eflag, int vflag)
+{
  int i,j;

  // set energy/virial flags
@ -192,6 +200,13 @@ void PPPMIntel::compute(int eflag, int vflag)
    else if (differentiation_flag == 0)
      cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMIntel::compute_second(int eflag, int vflag)
+{
+  int i,j;

  // calculate the force on my particles

@ -617,3 +632,38 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
    if (slabflag != 2) f[i].z += qfactor * ekz - fqqrd2es * sf;
  }
 }
+
+/* ----------------------------------------------------------------------
+  Pack data into intel package buffers if using LRT mode
+------------------------------------------------------------------------- */
+
+void PPPMIntel::pack_buffers()
+{
+  fix->start_watch(TIME_PACK);
+  #if defined(_OPENMP)
+  #pragma omp parallel default(none)
+  #endif
+  {
+    int ifrom, ito, tid;
+    IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
+                              comm->nthreads, 
+                              sizeof(IntelBuffers<float,double>::atom_t));
+    if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+      fix->get_mixed_buffers()->thr_pack(ifrom,ito,1);
+    else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+      fix->get_double_buffers()->thr_pack(ifrom,ito,1);
+    else
+      fix->get_single_buffers()->thr_pack(ifrom,ito,1);
+  }
+  fix->stop_watch(TIME_PACK);
+}
+
+/* ----------------------------------------------------------------------
+   Returns 0 if Intel optimizations for PPPM ignored due to offload
+------------------------------------------------------------------------- */
+
+#ifdef _LMP_INTEL_OFFLOAD
+int PPPMIntel::use_base() {
+  return _use_base;
+}
+#endif
--- a/src/USER-INTEL/pppm_intel.h
+++ b/src/USER-INTEL/pppm_intel.h
@ -12,7 +12,8 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Rodrigo Canales (RWTH Aachen University)
+   Contributing authors: Rodrigo Canales (RWTH Aachen University)
+                         W. Michael Brown (Intel)
 ------------------------------------------------------------------------- */

 #ifdef KSPACE_CLASS
@ -35,6 +36,13 @@ class PPPMIntel : public PPPM {
  virtual ~PPPMIntel();
  virtual void init();
  virtual void compute(int, int);
+  void compute_first(int, int);
+  void compute_second(int, int);
+  void pack_buffers();
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  int use_base();
+  #endif

 protected:
  FixIntel *fix;