git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12678 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2014-11-05 16:35:50 +00:00 · 2014-11-05 16:35:50 +00:00 · d586a4bbbb
parent 52408b16d2
commit d586a4bbbb
5 changed files with 187 additions and 12 deletions
--- a/examples/COUPLE/simple/README
+++ b/examples/COUPLE/simple/README
@ -12,10 +12,11 @@ libfwrapper.c  is the Fortran-to-C wrapper

 The 3 codes do the same thing, so you can compare them to see how to
 drive LAMMPS in this manner. The C driver is similar in spirit to what
-one could use to write a scripting language interface.  The Fortran
-driver in addition requires a wrapper library that interfaces the C
-interface of the LAMMPS library to Fortran and also translates the MPI
-communicator from Fortran to C.
+one could use to write a scripting language interface.  See
+python/examples/simple.py for an example of using Python as a wrapper
+in that way.  The Fortran driver in addition requires a wrapper
+library that interfaces the C interface of the LAMMPS library to
+Fortran and also translates the MPI communicator from Fortran to C.

 First, you must build LAMMPS as a library, either static or shared.
 See http://lammps.sandia.gov/doc/Section_start.html#start_5 for
--- a/src/GPU/pppm_gpu.cpp
+++ b/src/GPU/pppm_gpu.cpp
@ -206,13 +206,13 @@ void PPPMGPU::compute(int eflag, int vflag)
  else evflag = evflag_atom = eflag_global = vflag_global = 
        eflag_atom = vflag_atom = 0;

-  // If need per-atom energies/virials, also do particle map on host
-  // concurrently with GPU calculations
+  // If need per-atom energies/virials, allocate per-atom arrays here
+  // so that particle map on host can be done concurrently with GPU calculations
+
  if (evflag_atom && !peratom_allocate_flag) {
    allocate_peratom();
    cg_peratom->ghost_notify();
    cg_peratom->setup();
-    peratom_allocate_flag = 1;
  }

  bool success = true;
@ -233,12 +233,19 @@ void PPPMGPU::compute(int eflag, int vflag)
    domain->x2lamda(atom->nlocal);
  }

-  // extend size of per-atom arrays if necessary
+  // If need per-atom energies/virials, also do particle map on host
+  // concurrently with GPU calculations
+
+  if (evflag_atom) {
+
+    // extend size of per-atom arrays if necessary
+
+    if (atom->nlocal > nmax) {
+      memory->destroy(part2grid);
+      nmax = atom->nmax;
+      memory->create(part2grid,nmax,3,"pppm:part2grid");
+    }

-  if (evflag_atom && atom->nlocal > nmax) {
-    memory->destroy(part2grid);
-    nmax = atom->nmax;
-    memory->create(part2grid,nmax,3,"pppm:part2grid");
    particle_map();
  }

--- a/src/USER-INTEL/TEST/README
+++ b/src/USER-INTEL/TEST/README
@ -0,0 +1,67 @@
+If you run in another directory, please edit the read_data line to point to
+the data.rhodo file. This is included in the LAMMPS distribution in the
+'bench' directory.
+
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+Expected times are the expected LOOP times based on runs using dual-socket
+Intel Xeon processor E5-2697 V2 with Intel Xeon Phi coprocessor 7120P.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+
+TO RUN WITHOUT A PACKAGE:
+-----------------------------------------------------------------------------
+mpirun -np 48 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0 -v s off
+mpirun -np 48 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0 -v s off
+
+
+TO RUN WITH OMP PACKAGE
+-----------------------------------------------------------------------------
+env OMP_NUM_THREADS=2 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0 -v s omp
+++++++++++++++++++++++
+ EXPECTED TIME: 20.33s
+++++++++++++++++++++++
+
+env OMP_NUM_THREADS=1 mpirun -np 48 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0 -v s omp
+++++++++++++++++++++++
+ EXPECTED TIME: 19.92s
+++++++++++++++++++++++
+
+
+TO RUN WITH INTEL+OMP PACKAGE WITHOUT OFFLOAD:
+-----------------------------------------------------------------------------
+env OMP_NUM_THREADS=2 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 16.94s
+++++++++++++++++++++++
+
+env OMP_NUM_THREADS=2 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 5.80s
+++++++++++++++++++++++
+
+
+TO RUN WITH INTEL+OMP PACKAGE WITH OFFLOAD TO XEON PHI (AUTO-BALANCED):
+-----------------------------------------------------------------------------
+env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b -1 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 12.31s
+++++++++++++++++++++++
+
+env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b -1 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 4.00s
+++++++++++++++++++++++
+
+
+TO RUN WITH INTEL+OMP PACKAGE WITH OFFLOAD TO XEON PHI (FIXED BALANCE):
+-----------------------------------------------------------------------------
+env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0.68 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 11.40s
+++++++++++++++++++++++
+
+env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0.53 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 3.93s
+++++++++++++++++++++++
+
--- a/src/USER-INTEL/TEST/in.intel.lc
+++ b/src/USER-INTEL/TEST/in.intel.lc
@ -0,0 +1,61 @@
+# Gay-Berne benchmark
+# biaxial ellipsoid mesogens in isotropic phase
+# shape: 2 1.5 1
+# cutoff 4.0 with skin 0.8
+# NPT, T=2.4, P=8.0
+
+package intel 1 mode mixed balance $b
+package omp 0
+suffix $s
+processors * * * grid numa
+
+variable        x index 4
+variable        y index 2
+variable        z index 2
+
+variable i equal $x*32
+variable j equal $y*32
+variable k equal $z*32
+
+units	      lj
+atom_style    ellipsoid
+
+# creation
+lattice	      sc 0.22
+region	      box block 0 $i 0 $j 0 $k
+create_box    1 box
+create_atoms  1 box
+
+# read_data     data.gb
+
+set type 1 mass 1.5
+set type 1 shape 1 1.5 2
+set	      group all quat/random 982381
+
+compute	      rot all temp/asphere
+group	      spheroid type 1
+variable      dof equal count(spheroid)+3
+compute_modify rot extra ${dof}
+
+velocity      all create 2.4 41787 loop geom
+
+pair_style    gayberne 1.0 3.0 1.0 4.0
+pair_coeff    1 1 1.0 1.0 1.0 0.5 0.2 1.0 0.5 0.2
+
+neighbor      0.8 bin
+
+timestep      0.002
+thermo	      300
+
+# equilibration run
+fix	      1 all npt/asphere temp 2.4 2.4 0.1 iso 5.0 8.0 0.1
+compute_modify 1_temp extra ${dof}
+run	      210
+thermo        100
+
+reset_timestep 0
+unfix 1
+fix         1 all nve/asphere
+run 10
+run	      50
+
--- a/src/USER-INTEL/TEST/in.intel.rhodo
+++ b/src/USER-INTEL/TEST/in.intel.rhodo
@ -0,0 +1,39 @@
+# Rhodopsin model
+
+package intel 1 mode mixed balance $b
+package omp 0
+suffix $s
+
+variable	x index 4
+variable	y index 2
+variable	z index 2
+
+units           real  
+neigh_modify    delay 5 every 1   
+
+atom_style      full  
+atom_modify	map hash
+bond_style      harmonic 
+angle_style     charmm 
+dihedral_style  charmm 
+improper_style  harmonic 
+pair_style      lj/charmm/coul/long 8.0 10.0 
+pair_modify     mix arithmetic 
+kspace_style    pppm 1e-4 
+
+read_data       ../../../bench/data.rhodo
+
+replicate	$x $y $z
+
+fix             1 all shake 0.0001 5 0 m 1.0 a 232
+fix             2 all npt temp 300.0 300.0 100.0 &
+		z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
+
+special_bonds   charmm
+ 
+thermo          50
+thermo_style    multi 
+timestep        2.0
+
+run 10
+run		100