git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15243 f3b2605a-c512-4ea7-a41b-209d697bcdaa

2016-07-01 23:21:38 +00:00 · 2016-07-01 23:21:38 +00:00 · 78df7623b3
parent b4b30d65da
commit 78df7623b3
2 changed files with 25 additions and 19 deletions
--- a/src/USER-INTEL/TEST/README
+++ b/src/USER-INTEL/TEST/README
@ -1,35 +1,36 @@
 #############################################################################
 # Benchmarks
 #
-# in.intel.lj -		Atomic fluid (LJ Benchmark)
-# in.intel.rhodo -	Protein (Rhodopsin Benchmark)
-# in.intel.lc -	 	Liquid Crystal w/ Gay-Berne potential
-# in.intel.sw -		Silicon benchmark with Stillinger-Weber
-# in.intel.tersoff -	Silicon benchmark with Tersoff
-# in.intel.water - 	Coarse-grain water benchmark using Stillinger-Weber
+# in.intel.lj -	        Atomic fluid (LJ Benchmark)
+# in.intel.rhodo -      Protein (Rhodopsin Benchmark)
+# in.intel.lc -	        Liquid Crystal w/ Gay-Berne potential
+# in.intel.sw -	        Silicon benchmark with Stillinger-Weber
+# in.intel.tersoff -    Silicon benchmark with Tersoff
+# in.intel.water -      Coarse-grain water benchmark using Stillinger-Weber
 #
 #############################################################################

 #############################################################################
-# Expected Timesteps/second on E5-2697v3 with turbo on and HT enabled
+# Expected Timesteps/second with turbo on and HT enabled, LAMMPS 18-Jun-2016
 #
-# in.intel.lj -		131.943
-# in.intel.rhodo -	8.661
-# in.intel.lc -	 	14.015
-# in.intel.sw -		103.53
-# in.intel.tersoff -	55.525
-# in.intel.water - 	44.079
+#                     Xeon E5-2697v4     Xeon Phi 7250
+#                    
+# in.intel.lj -          162.764             179.148
+# in.intel.rhodo -        11.633              13.668
+# in.intel.lc -	          19.136              24.863
+# in.intel.sw -	         139.048             152.026
+# in.intel.tersoff -      82.663              92.985
+# in.intel.water -        59.838              85.704
 #
 #############################################################################

 #############################################################################
-# For Haswell and Broadwell architectures, depending on the compiler version, 
+# For Haswell (Xeon v3) architectures, depending on the compiler version, 
 # it may give better performance to compile for an AVX target (with -xAVX 
 # compiler option) instead of -xHost or -xCORE-AVX2 for some of the 
-# workloads due to inefficient code generation for gathers. Aside from
-# Tersoff, this will not significantly impact performance because FMA 
-# sensitive routines will still use AVX2 (MKL and SVML detect the processor 
-# at runtime)
+# workloads. In most cases, FMA sensitive routines will still use AVX2 
+# (MKL and SVML detect the processor at runtime). For Broadwell (Xeon v4)
+# architectures, -xCORE-AVX2 or -xHost will work best for all.
 #############################################################################

 #############################################################################
@ -86,3 +87,8 @@ mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 -sf intel
 # To run with USER-INTEL and automatic load balancing to 1 coprocessor
 #############################################################################
 mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 1 -sf intel
+
+#############################################################################
+# If using PPPM (in.intel.rhodo) on Intel Xeon Phi x200 series processors
+#############################################################################
+mpirun -np $LMP_CORES $LMP_BIN -in $bench -log none -pk intel 0 omp 3 lrt yes -sf intel
--- a/src/USER-INTEL/TEST/in.intel.rhodo
+++ b/src/USER-INTEL/TEST/in.intel.rhodo
@ -7,7 +7,7 @@ variable	n index 0	# Use NUMA Mapping for Multi-Node
 variable        b index 3       # Neighbor binsize
 variable	p index 0	# Use Power Measurement
 variable	c index 0	# 1 to use collectives for PPPM
-variable        d index 0       # 1 to use 'diff ad' for PPPM
+variable        d index 1       # 1 to use 'diff ad' for PPPM

 variable	x index 4
 variable	y index 2