git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12678 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp 2014-11-05 16:35:50 +00:00
parent 52408b16d2
commit d586a4bbbb
5 changed files with 187 additions and 12 deletions

View File

@ -12,10 +12,11 @@ libfwrapper.c is the Fortran-to-C wrapper
The 3 codes do the same thing, so you can compare them to see how to
drive LAMMPS in this manner. The C driver is similar in spirit to what
one could use to write a scripting language interface. The Fortran
driver in addition requires a wrapper library that interfaces the C
interface of the LAMMPS library to Fortran and also translates the MPI
communicator from Fortran to C.
one could use to write a scripting language interface. See
python/examples/simple.py for an example of using Python as a wrapper
in that way. The Fortran driver in addition requires a wrapper
library that interfaces the C interface of the LAMMPS library to
Fortran and also translates the MPI communicator from Fortran to C.
First, you must build LAMMPS as a library, either static or shared.
See http://lammps.sandia.gov/doc/Section_start.html#start_5 for

View File

@ -206,13 +206,13 @@ void PPPMGPU::compute(int eflag, int vflag)
else evflag = evflag_atom = eflag_global = vflag_global =
eflag_atom = vflag_atom = 0;
// If need per-atom energies/virials, also do particle map on host
// concurrently with GPU calculations
// If need per-atom energies/virials, allocate per-atom arrays here
// so that particle map on host can be done concurrently with GPU calculations
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
peratom_allocate_flag = 1;
}
bool success = true;
@ -233,12 +233,19 @@ void PPPMGPU::compute(int eflag, int vflag)
domain->x2lamda(atom->nlocal);
}
// extend size of per-atom arrays if necessary
// If need per-atom energies/virials, also do particle map on host
// concurrently with GPU calculations
if (evflag_atom) {
// extend size of per-atom arrays if necessary
if (atom->nlocal > nmax) {
memory->destroy(part2grid);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
}
if (evflag_atom && atom->nlocal > nmax) {
memory->destroy(part2grid);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
particle_map();
}

View File

@ -0,0 +1,67 @@
If you run in another directory, please edit the read_data line to point to
the data.rhodo file. This is included in the LAMMPS distribution in the
'bench' directory.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Expected times are the expected LOOP times based on runs using dual-socket
Intel Xeon processor E5-2697 V2 with Intel Xeon Phi coprocessor 7120P.
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
TO RUN WITHOUT A PACKAGE:
-----------------------------------------------------------------------------
mpirun -np 48 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0 -v s off
mpirun -np 48 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0 -v s off
TO RUN WITH OMP PACKAGE
-----------------------------------------------------------------------------
env OMP_NUM_THREADS=2 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0 -v s omp
+++++++++++++++++++++++
+ EXPECTED TIME: 20.33s
+++++++++++++++++++++++
env OMP_NUM_THREADS=1 mpirun -np 48 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0 -v s omp
+++++++++++++++++++++++
+ EXPECTED TIME: 19.92s
+++++++++++++++++++++++
TO RUN WITH INTEL+OMP PACKAGE WITHOUT OFFLOAD:
-----------------------------------------------------------------------------
env OMP_NUM_THREADS=2 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 16.94s
+++++++++++++++++++++++
env OMP_NUM_THREADS=2 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 5.80s
+++++++++++++++++++++++
TO RUN WITH INTEL+OMP PACKAGE WITH OFFLOAD TO XEON PHI (AUTO-BALANCED):
-----------------------------------------------------------------------------
env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b -1 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 12.31s
+++++++++++++++++++++++
env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b -1 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 4.00s
+++++++++++++++++++++++
TO RUN WITH INTEL+OMP PACKAGE WITH OFFLOAD TO XEON PHI (FIXED BALANCE):
-----------------------------------------------------------------------------
env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.rhodo -log none -v b 0.68 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 11.40s
+++++++++++++++++++++++
env OMP_NUM_THREADS=1 mpirun -np 24 ../../../src/lmp_YOUR_MACHINE -in in.intel.lc -log none -v b 0.53 -v s intel
+++++++++++++++++++++++
+ EXPECTED TIME: 3.93s
+++++++++++++++++++++++

View File

@ -0,0 +1,61 @@
# Gay-Berne benchmark
# biaxial ellipsoid mesogens in isotropic phase
# shape: 2 1.5 1
# cutoff 4.0 with skin 0.8
# NPT, T=2.4, P=8.0
package intel 1 mode mixed balance $b
package omp 0
suffix $s
processors * * * grid numa
variable x index 4
variable y index 2
variable z index 2
variable i equal $x*32
variable j equal $y*32
variable k equal $z*32
units lj
atom_style ellipsoid
# creation
lattice sc 0.22
region box block 0 $i 0 $j 0 $k
create_box 1 box
create_atoms 1 box
# read_data data.gb
set type 1 mass 1.5
set type 1 shape 1 1.5 2
set group all quat/random 982381
compute rot all temp/asphere
group spheroid type 1
variable dof equal count(spheroid)+3
compute_modify rot extra ${dof}
velocity all create 2.4 41787 loop geom
pair_style gayberne 1.0 3.0 1.0 4.0
pair_coeff 1 1 1.0 1.0 1.0 0.5 0.2 1.0 0.5 0.2
neighbor 0.8 bin
timestep 0.002
thermo 300
# equilibration run
fix 1 all npt/asphere temp 2.4 2.4 0.1 iso 5.0 8.0 0.1
compute_modify 1_temp extra ${dof}
run 210
thermo 100
reset_timestep 0
unfix 1
fix 1 all nve/asphere
run 10
run 50

View File

@ -0,0 +1,39 @@
# Rhodopsin model
package intel 1 mode mixed balance $b
package omp 0
suffix $s
variable x index 4
variable y index 2
variable z index 2
units real
neigh_modify delay 5 every 1
atom_style full
atom_modify map hash
bond_style harmonic
angle_style charmm
dihedral_style charmm
improper_style harmonic
pair_style lj/charmm/coul/long 8.0 10.0
pair_modify mix arithmetic
kspace_style pppm 1e-4
read_data ../../../bench/data.rhodo
replicate $x $y $z
fix 1 all shake 0.0001 5 0 m 1.0 a 232
fix 2 all npt temp 300.0 300.0 100.0 &
z 0.0 0.0 1000.0 mtk no pchain 0 tchain 1
special_bonds charmm
thermo 50
thermo_style multi
timestep 2.0
run 10
run 100