add fix mscg command, example, lib

2017-01-09 13:36:40 -07:00 · 2017-01-09 13:36:40 -07:00 · c31f1e9f22
parent 51fa33a407
commit c31f1e9f22
42 changed files with 23294 additions and 125 deletions
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="6 Jan 2017 version">
+<META NAME="docnumber" CONTENT="9 Jan 2017 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@ -21,7 +21,7 @@
 <H1></H1>

 LAMMPS Documentation :c,h3
-6 Jan 2017 version :c,h4
+9 Jan 2017 version :c,h4

 Version info: :h4

--- a/doc/src/Section_commands.txt
+++ b/doc/src/Section_commands.txt
@ -583,6 +583,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
 "lineforce"_fix_lineforce.html,
 "momentum (k)"_fix_momentum.html,
 "move"_fix_move.html,
+"mscg"_fix_mscg.html,
 "msst"_fix_msst.html,
 "neb"_fix_neb.html,
 "nph (ko)"_fix_nh.html,
@ -918,7 +919,7 @@ KOKKOS, o = USER-OMP, t = OPT.
 "dpd (go)"_pair_dpd.html,
 "dpd/tstat (go)"_pair_dpd.html,
 "dsmc"_pair_dsmc.html,
-"eam (gkot)"_pair_eam.html,
+"eam (gkiot)"_pair_eam.html,
 "eam/alloy (gkot)"_pair_eam.html,
 "eam/fs (gkot)"_pair_eam.html,
 "eim (o)"_pair_eim.html,
--- a/doc/src/accelerate_intel.txt
+++ b/doc/src/accelerate_intel.txt
@ -29,7 +29,7 @@ Bond Styles: fene, harmonic :l
 Dihedral Styles: charmm, harmonic, opls :l
 Fixes: nve, npt, nvt, nvt/sllod :l
 Improper Styles: cvff, harmonic :l
-Pair Styles: buck/coul/cut, buck/coul/long, buck, gayberne,
+Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
 charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l
 K-Space Styles: pppm :l
 :ule
--- a/doc/src/fix_mscg.txt
+++ b/doc/src/fix_mscg.txt
@ -0,0 +1,130 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+fix mscg command :h3
+
+[Syntax:]
+
+fix ID group-ID mscg N keyword args ... :pre
+
+ID, group-ID are documented in "fix"_fix.html command :ulb,l
+mscg = style name of this fix command :l
+N = envoke this fix every this many timesteps :l
+zero or more keyword/value pairs may be appended :l
+keyword = {range} or {name} or {max} :l
+  {range} arg = {on} or {off}
+    {on} = range finding functionality is performed
+    {off} = force matching functionality is performed
+  {name} args = name1 ... nameN
+    name1,...,nameN = string names for each atom type (1-Ntype)
+  {max} args = maxb maxa maxd
+    maxb,maxa,maxd = maximum bonds/angles/dihedrals per atom :pre
+:ule
+
+[Examples:]
+
+fix 1 all mscg 1
+fix 1 all mscg 1 range name A B
+fix 1 all mscg 1 max 4 8 20 :pre
+
+[Description:]
+
+This fix applies the Multi-Scale Coarse-Graining (MSCG) method to
+snapshots from a dump file to generate potentials for coarse-grained
+simulations from all-atom simulations, using a force-matching
+technique ("Izvekov"_#Izvekov, "Noid"_#Noid).
+
+It makes use of the MS-CG library, written and maintained by Greg
+Voth's group at the University of Chicago, which is freely available
+on their "MS-CG GitHub
+site"_https://github.com/uchicago-voth/MSCG-release.  See instructions
+on obtaining and installing the MS-CG library in the src/MSCG/README
+file, which must be done before you build LAMMPS with this fix command
+and use the command in a LAMMPS input script.
+
+An example script using this fix is provided the examples/mscg
+directory.
+
+The general workflow for using LAMMPS in conjunction with the MS-CG
+library to create a coarse-grained model and run coarse-grained
+simulations is as follows:
+
+Perform all-atom simulations on the system to be coarse grained.
+Generate a trajectory mapped to the coarse-grained model.
+Create input files for the MS-CG library.
+Run the range finder functionality of the MS-CG library.  
+Run the force matching functionality of the MS-CG library.
+Check the results of the force matching.
+Run coarse-grained simulations using the new coarse-grained potentials. :ol
+
+This fix can perform the range finding and force matching steps 4 and
+5 of the above workflow when used in conjunction with the
+"rerun"_rerun.html command.  It does not perform steps 1-3 and 6-7.
+
+Step 2 can be performed using a Python script (what is the name?)
+provided with the MS-CG library which defines the coarse-grained model
+and converts a standard LAMMPS dump file for an all-atom simulation
+(step 1) into a LAMMPS dump file which has the positions of and forces
+on the coarse-grained beads.  
+
+In step 3, an input file named "control.in" is needed by the MS-CG
+library which sets parameters for the range finding and force matching
+functionalities.  See the examples/mscg/control.in file as an example.
+And see the documentation provided with the MS-CG library for more
+info on this file.
+
+When this fix is used to perform steps 4 and 5, the MS-CG library also
+produces additional output files.  The range finder functionality
+(step 4) outputs files defining pair and bonded interaction ranges.
+The force matching functionality (step 5) outputs tabulated force
+files for every interaction in the system. Other diagnostic files can
+also be output depending on the paramters in the MS-CG library input
+script.  Again, see the documentation provided with the MS-CG library
+for more info.
+
+:line
+
+The {range} keyword specifies which MS-CG library functionality should
+be invoked. If {on}, the step 4 range finder functionality is invoked.
+{off}, the step 5 force matching functionality is invoked.
+
+If the {name} keyword is used, string names are defined to associate
+with the integer atom types in LAMMPS.  {Ntype} names must be
+provided, one for each atom type (1-Ntype).
+
+The {max} keyword specifies the maximum number of bonds, angles, and
+dihedrals a bead can have in the coarse-grained model.
+
+[Restrictions:]
+
+This fix is part of the MSCG package. It is only enabled if LAMMPS was
+built with that package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
+
+The MS-CG library uses C++11, which may not be supported by older
+compilers. The MS-CG library also has some additional numeric library
+dependencies, which are describd in its documentation.
+
+Currently, the MS-CG library is not setup to run in parallel with MPI,
+so this fix can only be used in a serial LAMMPS build and run
+on a single processor.
+
+[Related commands:] none
+
+[Default:]
+
+The default keyword settings are range off, max 4 12 36.
+
+:line
+
+:link(Izvekov)
+[(Izvekov)] Izvekov, Voth, J Chem Phys 123, 134105 (2005).
+
+:link(Noid)
+[(Noid)] Noid, Chu, Ayton, Krishna, Izvekov, Voth, Das, Andersen, J
+Chem Phys 128, 134105 (2008).
--- a/doc/src/fixes.txt
+++ b/doc/src/fixes.txt
@ -68,6 +68,7 @@ Fixes :h1
   fix_meso_stationary
   fix_momentum
   fix_move
+   fix_mscg
   fix_msst
   fix_neb
   fix_nh
--- a/doc/src/lammps.book
+++ b/doc/src/lammps.book
@ -192,6 +192,7 @@ fix_meso.html
 fix_meso_stationary.html
 fix_momentum.html
 fix_move.html
+fix_mscg.html
 fix_msst.html
 fix_neb.html
 fix_nh.html
--- a/doc/src/pair_eam.txt
+++ b/doc/src/pair_eam.txt
@ -8,6 +8,7 @@

 pair_style eam command :h3
 pair_style eam/gpu command :h3
+pair_style eam/intel command :h3
 pair_style eam/kk command :h3
 pair_style eam/omp command :h3
 pair_style eam/opt command :h3
--- a/doc/src/read_dump.txt
+++ b/doc/src/read_dump.txt
@ -15,11 +15,12 @@ read_dump file Nstep field1 field2 ... keyword values ... :pre
 file = name of dump file to read :ulb,l
 Nstep = snapshot timestep to read from file :l
 one or more fields may be appended :l
-field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz}
+field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz} or {fx} or {fy} or {fz}
  {x},{y},{z} = atom coordinates
  {vx},{vy},{vz} = velocity components
  {q} = charge
-  {ix},{iy},{iz} = image flags in each dimension :pre
+  {ix},{iy},{iz} = image flags in each dimension
+  {fx},{fy},{fz} = force components :pre
 zero or more keyword/value pairs may be appended :l
 keyword = {box} or {replace} or {purge} or {trim} or {add} or {label} or {scaled} or {wrapped} or {format} :l
  {box} value = {yes} or {no} = replace simulation box with dump box
--- a/examples/README
+++ b/examples/README
@ -82,6 +82,7 @@ meam:	  MEAM test for SiC and shear (same as shear examples)
 melt:	  rapid melt of 3d LJ system
 micelle:  self-assembly of small lipid-like molecules into 2d bilayers
 min:	  energy minimization of 2d LJ melt
+mscg:     parameterize a multi-scale coarse-graining (MSCG) model
 msst:	  MSST shock dynamics
 nb3b:     use of nonbonded 3-body harmonic pair style
 neb:	  nudged elastic band (NEB) calculation for barrier finding
--- a/examples/mscg/README
+++ b/examples/mscg/README
@ -0,0 +1,10 @@
+Running this example requires that LAMMPS be built with the MSCG
+package and its fix mscg command.  The fix uses the Multi-Scale
+Coarse-Graining (MS-CG) library, freely available at
+https://github.com/uchicago-voth/MSCG-release, to compute optimized
+coarse-grained force field parameters.  The MS-CG library was
+developed by Jacob Wagner in Greg Voth's group at the University of
+Chicago.
+
+See the lib/mscg/README file for instructions on how to download and
+install the MS-CG library for use with LAMMPS.
--- a/examples/mscg/control.in
+++ b/examples/mscg/control.in
@ -0,0 +1,12 @@
+block_size 1
+start_frame 1
+n_frames 19
+nonbonded_cutoff 10.0
+basis_type 0
+primary_output_style 0
+output_solution_flag 1
+output_spline_coeffs_flag 1
+pair_nonbonded_bspline_basis_order 6
+pair_nonbonded_basis_set_resolution 0.7
+pair_nonbonded_output_binwidth 0.1
+matrix_type 0
--- a/examples/mscg/data.meoh
+++ b/examples/mscg/data.meoh
--- a/examples/mscg/dump.meoh
+++ b/examples/mscg/dump.meoh
--- a/examples/mscg/in.mscg
+++ b/examples/mscg/in.mscg
@ -0,0 +1,22 @@
+units real
+atom_style full
+pair_style zero 10.0
+
+read_data data.meoh
+pair_coeff * *
+
+thermo 1
+thermo_style custom step
+
+# Test 1a: range finder functionality
+fix 1 all mscg 1 range on
+rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
+print "TEST_1a mscg range finder"
+unfix 1
+
+# Test 1b: force matching functionality
+fix 1 all mscg 1
+rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
+print "TEST_1b mscg force matching"
+
+print TEST_DONE
--- a/examples/mscg/output_9Jan17/1_1.dat
+++ b/examples/mscg/output_9Jan17/1_1.dat
@ -0,0 +1,77 @@
+2.500000 5.670970817963099e+02
+2.600000 2.404059283529051e+02
+2.700000 9.157060823529977e+01
+2.800000 3.428273061369140e+01
+2.900000 1.619868149395266e+01
+3.000000 1.039607214301755e+01
+3.100000 6.830187514267188e+00
+3.200000 3.861970842349535e+00
+3.300000 1.645948643278161e+00
+3.400000 2.395428971623918e-01
+3.500000 -4.276763637833773e-01
+3.600000 -5.132022977965877e-01
+3.700000 -2.208024961234051e-01
+3.800000 2.402697744243800e-01
+3.900000 6.956064296165573e-01
+4.000000 1.034070044257954e+00
+4.100000 1.205997975111669e+00
+4.200000 1.209501102128581e+00
+4.300000 1.076304670380924e+00
+4.400000 8.575891319958883e-01
+4.500000 6.098309880892070e-01
+4.600000 3.807992942746473e-01
+4.700000 1.995994191469442e-01
+4.800000 7.699059877424269e-02
+4.900000 9.750744163981299e-03
+5.000000 -1.480308769532222e-02
+5.100000 -1.429422279228416e-02
+5.200000 -6.765899050869768e-03
+5.300000 -6.214398421078919e-03
+5.400000 -1.951586041390797e-02
+5.500000 -4.689090237947263e-02
+5.600000 -8.376292122940529e-02
+5.700000 -1.226699982917263e-01
+5.800000 -1.551768041657136e-01
+5.900000 -1.737865035767736e-01
+6.000000 -1.738272491408507e-01
+6.100000 -1.546779867768825e-01
+6.200000 -1.193171291488982e-01
+6.300000 -7.321054075616322e-02
+6.400000 -2.317411193286228e-02
+6.500000 2.376366715221714e-02
+6.600000 6.149913249600215e-02
+6.700000 8.597538938112201e-02
+6.800000 9.590170060736655e-02
+6.900000 9.245100462148878e-02
+7.000000 7.855487875847664e-02
+7.100000 5.818301960249692e-02
+7.200000 3.562272334783877e-02
+7.300000 1.475836615985744e-02
+7.400000 -1.639617536128255e-03
+7.500000 -1.237881063914745e-02
+7.600000 -1.768202571195587e-02
+7.700000 -1.877757119362295e-02
+7.800000 -1.748001968416543e-02
+7.900000 -1.577097622918088e-02
+8.000000 -1.537984660448136e-02
+8.100000 -1.737044400054951e-02
+8.200000 -2.187939410237979e-02
+8.300000 -2.823987455760605e-02
+8.400000 -3.525715284001425e-02
+8.500000 -4.148996251287761e-02
+8.600000 -4.553187949229211e-02
+8.700000 -4.629269831051163e-02
+8.800000 -4.327548798226762e-02
+8.900000 -3.674131754868225e-02
+9.000000 -2.758883541814894e-02
+9.100000 -1.712151838480657e-02
+9.200000 -6.810600249997737e-03
+9.300000 1.941999556272785e-03
+9.400000 8.040747353879739e-03
+9.500000 1.092691524686838e-02
+9.600000 1.063606620723048e-02
+9.700000 7.416550438142138e-03
+9.800000 1.175066786686231e-03
+9.900000 -9.084427187675534e-03
+10.000000 -2.582180514463068e-02
+10.100000 -5.352186189454393e-02
--- a/examples/mscg/output_9Jan17/1_1.table
+++ b/examples/mscg/output_9Jan17/1_1.table
@ -0,0 +1,82 @@
+# Header information on force file
+
+1_1
+N 77 R 2.500000 10.100000
+
+1 2.500000 69.428523 567.097082
+2 2.600000 29.053372 240.405928
+3 2.700000 12.454545 91.570608
+4 2.800000 6.161878 34.282731
+5 2.900000 3.637808 16.198681
+6 3.000000 2.308070 10.396072
+7 3.100000 1.446757 6.830188
+8 3.200000 0.912149 3.861971
+9 3.300000 0.636753 1.645949
+10 3.400000 0.542478 0.239543
+11 3.500000 0.551885 -0.427676
+12 3.600000 0.598929 -0.513202
+13 3.700000 0.635629 -0.220802
+14 3.800000 0.634656 0.240270
+15 3.900000 0.587862 0.695606
+16 4.000000 0.501378 1.034070
+17 4.100000 0.389375 1.205998
+18 4.200000 0.268600 1.209501
+19 4.300000 0.154310 1.076305
+20 4.400000 0.057615 0.857589
+21 4.500000 -0.015756 0.609831
+22 4.600000 -0.065288 0.380799
+23 4.700000 -0.094307 0.199599
+24 4.800000 -0.108137 0.076991
+25 4.900000 -0.112474 0.009751
+26 5.000000 -0.112221 -0.014803
+27 5.100000 -0.110767 -0.014294
+28 5.200000 -0.109714 -0.006766
+29 5.300000 -0.109065 -0.006214
+30 5.400000 -0.107778 -0.019516
+31 5.500000 -0.104458 -0.046891
+32 5.600000 -0.097925 -0.083763
+33 5.700000 -0.087603 -0.122670
+34 5.800000 -0.073711 -0.155177
+35 5.900000 -0.057263 -0.173787
+36 6.000000 -0.039882 -0.173827
+37 6.100000 -0.023457 -0.154678
+38 6.200000 -0.009757 -0.119317
+39 6.300000 -0.000131 -0.073211
+40 6.400000 0.004688 -0.023174
+41 6.500000 0.004659 0.023764
+42 6.600000 0.000396 0.061499
+43 6.700000 -0.006978 0.085975
+44 6.800000 -0.016072 0.095902
+45 6.900000 -0.025489 0.092451
+46 7.000000 -0.034040 0.078555
+47 7.100000 -0.040877 0.058183
+48 7.200000 -0.045567 0.035623
+49 7.300000 -0.048086 0.014758
+50 7.400000 -0.048742 -0.001640
+51 7.500000 -0.048041 -0.012379
+52 7.600000 -0.046538 -0.017682
+53 7.700000 -0.044715 -0.018778
+54 7.800000 -0.042902 -0.017480
+55 7.900000 -0.041239 -0.015771
+56 8.000000 -0.039682 -0.015380
+57 8.100000 -0.038044 -0.017370
+58 8.200000 -0.036082 -0.021879
+59 8.300000 -0.033576 -0.028240
+60 8.400000 -0.030401 -0.035257
+61 8.500000 -0.026564 -0.041490
+62 8.600000 -0.022213 -0.045532
+63 8.700000 -0.017621 -0.046293
+64 8.800000 -0.013143 -0.043275
+65 8.900000 -0.009142 -0.036741
+66 9.000000 -0.005926 -0.027589
+67 9.100000 -0.003690 -0.017122
+68 9.200000 -0.002494 -0.006811
+69 9.300000 -0.002250 0.001942
+70 9.400000 -0.002749 0.008041
+71 9.500000 -0.003698 0.010927
+72 9.600000 -0.004776 0.010636
+73 9.700000 -0.005678 0.007417
+74 9.800000 -0.006108 0.001175
+75 9.900000 -0.005712 -0.009084
+76 10.000000 -0.003967 -0.025822
+77 10.100000 0.000000 -0.053522
--- a/examples/mscg/output_9Jan17/b-spline.out
+++ b/examples/mscg/output_9Jan17/b-spline.out
@ -0,0 +1,2 @@
+n: 1 1 6 12 2.400000000000002e+00 1.010000000000000e+01
+1.200460787805587e+03 2.169623423326193e+01 2.388396964379328e+01 -1.197754948555067e+01 6.472482422420378e+00 -1.483711824891365e+00 7.768139601662113e-01 -7.869494711740244e-01 4.830820182054661e-01 -1.892989444995645e-01 1.021275453070386e-01 -1.637649039972671e-01 5.570978712841167e-02 7.637188693695119e-03 -4.109175461195019e-03 -5.352186189455146e-02 
--- a/examples/mscg/output_9Jan17/rmin.in
+++ b/examples/mscg/output_9Jan17/rmin.in
@ -0,0 +1 @@
+1 1 2.852369 10.000000 fm
--- a/examples/mscg/output_9Jan17/rmin_b.in
+++ b/examples/mscg/output_9Jan17/rmin_b.in
--- a/examples/mscg/output_9Jan17/sol_info.out
+++ b/examples/mscg/output_9Jan17/sol_info.out
@ -0,0 +1,18 @@
+fm_matrix_rows:3000; fm_matrix_columns:16;
+Singular vector:
+2.442317e+00
+2.105009e+00
+1.433251e+00
+1.184602e+00
+9.739627e-01
+6.944898e-01
+5.376709e-01
+4.616070e-01
+3.257062e-01
+2.683729e-01
+1.530153e-01
+9.336288e-02
+5.042150e-02
+2.126912e-02
+1.446682e-02
+4.167763e-05
--- a/examples/mscg/output_9Jan17/x.out
+++ b/examples/mscg/output_9Jan17/x.out
@ -0,0 +1 @@
+‘-ÂØ×Á’@47h<²5@¸¿¦ÕKâ7@ÑR½]<5D>ô'Àéë nÒã@ÝŒIœH½÷¿¯?ó¨Ûè?r€I¨°.é¿š^×ÐêÞ?Wå£ò:È¿(Oã%º?ËNs•?öÄ¿<C384>:‘Cþ…¬?<3F>ËÃ:,H?à}‚cÈÔp¿£<C2BF>ê¬7g«¿
--- a/examples/voronoi/README
+++ b/examples/voronoi/README
@ -0,0 +1,10 @@
+Running this example requires that LAMMPS be built with the VORONOI
+package and its compute voronoi command.  The compute uses the Voro++
+library, freely available at http://math.lbl.gov/voro++, to compute
+the Voronoi tesselation locally on each processor.  Voro++ was
+developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
+Laboratory.
+
+See the lib/voronoi/README file for instructions on how to download
+and install the Voro++ library for use with LAMMPS.
+
--- a/lib/README
+++ b/lib/README
@ -39,6 +39,8 @@ meam	      modified embedded atom method (MEAM) potential, MEAM package
                from Greg Wagner (Sandia)
 molfile       hooks to VMD molfile plugins, used by the USER-MOLFILE package
                from Axel Kohlmeyer (Temple U) and the VMD development team
+mscg          hooks to the MSCG library, used by fix_mscg command
+                from Jacob Wagner and Greg Voth group (U Chicago)
 python        hooks to the system Python library, used by the PYTHON package
                from the LAMMPS development team
 qmmm	      quantum mechanics/molecular mechanics coupling interface
--- a/lib/mscg/Makefile.lammps
+++ b/lib/mscg/Makefile.lammps
@ -0,0 +1,5 @@
+# Settings that the LAMMPS build will import when this package library is used
+
+mscg_SYSINC = 
+mscg_SYSLIB = -lm -lgsl -llapack -lcblas
+mscg_SYSPATH = 
--- a/lib/mscg/README
+++ b/lib/mscg/README
@ -0,0 +1,53 @@
+This directory contains links to the Multi-scale Coarse-graining
+(MS-CG) library which is required to use the MSCG package and its fix
+command in a LAMMPS input script.
+
+The MS-CG library is available at
+https://github.com/uchicago-voth/MSCG-release and was developed by
+Jacob Wagner in Greg Voth's group at the University of Chicago.
+
+-----------------
+
+You must perform the following steps yourself.
+
+1.  Download MS-CG at https://github.com/uchicago-voth/MSCG-release
+    either as a tarball or via SVN, and unpack the tarball either in
+    this /lib/mscg directory or somewhere else on your system.
+
+2.  Compile MS-CG from within its home directory using your makefile choice:
+    % make -f Makefile."name" libmscg.a
+
+3.  There is no need to install MS-CG if you only wish 
+    to use it from LAMMPS.
+
+4.  Create two soft links in this dir (lib/mscg) to the MS-CG src
+    directory.  E.g if you built MS-CG in this dir:
+      % ln -s mscgfm-master/src includelink
+      % ln -s mscgfm-master/src liblink
+    These links could instead be set to the include and lib
+    directories created by a MS-CG install, e.g.
+      % ln -s /usr/local/include includelink
+      % ln -s /usr/local/lib liblink
+
+-----------------
+
+When these steps are complete you can build LAMMPS with the MS-CG
+package installed:
+
+% cd lammps/src
+% make yes-USER-MSCG
+% make g++ (or whatever target you wish)
+
+Note that if you download and unpack a new LAMMPS tarball, the
+"includelink" and "liblink" files will be lost and you will need to
+re-create them (step 4).  If you built MS-CG in this directory (as
+opposed to somewhere else on your system) and did not install it
+somewhere else, you will also need to repeat steps 1,2,3.
+
+The Makefile.lammps file in this directory is there for compatibility
+with the way other libraries under the lib dir are linked with by
+LAMMPS.  MS-CG requires the GSL, LAPACK, and BLAS libraries as listed
+in Makefile.lammps.  If they are not in default locations where your
+LD_LIBRARY_PATH environment settings can find them, then you should
+add the approrpriate -L paths to the mscg_SYSPATH variable in
+Makefile.lammps.
--- a/lib/voronoi/README
+++ b/lib/voronoi/README
@ -6,6 +6,8 @@ The Voro++ library is available at http://math.lbl.gov/voro++ and was
 developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
 Laboratory.

+-----------------
+
 You must perform the following steps yourself, or you can use the
 install.py Python script to automate any or all steps of the process.
 Type "python install.py" for instructions.
@ -31,11 +33,13 @@ Type "python install.py" for instructions.
    to the Voro++ src directory is.  E.g if you built Voro++ in this dir:
      % ln -s voro++-0.4.6/src includelink
      % ln -s voro++-0.4.6/src liblink
-    Note that these links could also be set to the include and lib
+    These links could instead be set to the include and lib
    directories created by a Voro++ install, e.g.
      % ln -s /usr/local/include includelink
      % ln -s /usr/local/lib liblink

+-----------------
+
 When these steps are complete you can build LAMMPS
 with the VORONOI package installed:

@ -43,14 +47,13 @@ with the VORONOI package installed:
 % make yes-voronoi
 % make g++ (or whatever target you wish)

-Note that if you download and unpack a new LAMMPS tarball,
-the "includelink" and "liblink" files will be lost
-and you will need to re-create them (step 4).  If you
-built Voro++ in this directory (as opposed to somewhere
-else on your system) and did not install it somewhere
-else, you will also need to repeat steps 1,2,3.
+Note that if you download and unpack a new LAMMPS tarball, the
+"includelink" and "liblink" files will be lost and you will need to
+re-create them (step 4).  If you built Voro++ in this directory (as
+opposed to somewhere else on your system) and did not install it
+somewhere else, you will also need to repeat steps 1,2,3.

 The Makefile.lammps file in this directory is there for compatibility
 with the way other libraries under the lib dir are linked with by
-LAMMPS.  However, Voro++ requires no auxiliary files or
-settings, so its variables are blank.
+LAMMPS.  However, Voro++ requires no auxiliary files or settings, so
+its variables are blank.
--- a/src/KSPACE/pppm.cpp
+++ b/src/KSPACE/pppm.cpp
@ -260,8 +260,6 @@ void PPPM::init()
    double theta = force->angle->equilibrium_angle(typeA);
    double blen = force->bond->equilibrium_distance(typeB);
    alpha = qdist / (cos(0.5*theta) * blen);
-    if (domain->triclinic)
-      error->all(FLERR,"Cannot (yet) use PPPM with triclinic box and TIP4P");
  }

  // compute qsum & qsqsum and warn if not charge-neutral
--- a/src/KSPACE/pppm_tip4p.cpp
+++ b/src/KSPACE/pppm_tip4p.cpp
@ -42,7 +42,7 @@ using namespace MathConst;
 PPPMTIP4P::PPPMTIP4P(LAMMPS *lmp, int narg, char **arg) :
  PPPM(lmp, narg, arg)
 {
-  triclinic_support = 0;
+  triclinic_support = 1;
  tip4pflag = 1;
 }

@ -479,9 +479,9 @@ void PPPMTIP4P::fieldforce_peratom()
 }

 /* ----------------------------------------------------------------------
-  find 2 H atoms bonded to O atom i
-  compute position xM of fictitious charge site for O atom
-  also return local indices iH1,iH2 of H atoms
+   find 2 H atoms bonded to O atom i
+   compute position xM of fictitious charge site for O atom
+   also return local indices iH1,iH2 of H atoms
 ------------------------------------------------------------------------- */

 void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
@ -498,6 +498,11 @@ void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
  iH1 = domain->closest_image(i,iH1);
  iH2 = domain->closest_image(i,iH2);

+  if (triclinic) {
+    find_M_triclinic(i,iH1,iH2,xM);
+    return;
+  }
+
  double **x = atom->x;

  double delx1 = x[iH1][0] - x[i][0];
@ -512,3 +517,39 @@ void PPPMTIP4P::find_M(int i, int &iH1, int &iH2, double *xM)
  xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
  xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
 }
+
+/* ----------------------------------------------------------------------
+   triclinic version of find_M()
+   calculation of M coords done in real space
+------------------------------------------------------------------------- */
+
+void PPPMTIP4P::find_M_triclinic(int i, int iH1, int iH2, double *xM)
+{
+  double **x = atom->x;
+
+  // convert from lamda to real space
+  // may be possible to do this more efficiently in lamda space
+
+  domain->lamda2x(x[i],x[i]);
+  domain->lamda2x(x[iH1],x[iH1]);
+  domain->lamda2x(x[iH2],x[iH2]);
+
+  double delx1 = x[iH1][0] - x[i][0];
+  double dely1 = x[iH1][1] - x[i][1];
+  double delz1 = x[iH1][2] - x[i][2];
+
+  double delx2 = x[iH2][0] - x[i][0];
+  double dely2 = x[iH2][1] - x[i][1];
+  double delz2 = x[iH2][2] - x[i][2];
+
+  xM[0] = x[i][0] + alpha * 0.5 * (delx1 + delx2);
+  xM[1] = x[i][1] + alpha * 0.5 * (dely1 + dely2);
+  xM[2] = x[i][2] + alpha * 0.5 * (delz1 + delz2);
+
+  // convert back to lamda space
+
+  domain->x2lamda(x[i],x[i]);
+  domain->x2lamda(x[iH1],x[iH1]);
+  domain->x2lamda(x[iH2],x[iH2]);
+  domain->x2lamda(xM,xM);
+}
--- a/src/KSPACE/pppm_tip4p.h
+++ b/src/KSPACE/pppm_tip4p.h
@ -39,6 +39,7 @@ class PPPMTIP4P : public PPPM {

 private:
  void find_M(int, int &, int &, double *);
+  void find_M_triclinic(int, int, int, double *);
 };

 }
--- a/src/MSCG/Install.sh
+++ b/src/MSCG/Install.sh
@ -0,0 +1,63 @@
+# Install/unInstall package files in LAMMPS
+# mode = 0/1/2 for uninstall/install/update
+
+mode=$1
+
+# arg1 = file, arg2 = file it depends on
+
+action () {
+  if (test $mode = 0) then
+    rm -f ../$1
+  elif (! cmp -s $1 ../$1) then
+    if (test -z "$2" || test -e ../$2) then
+      cp $1 ..
+      if (test $mode = 2) then
+        echo "  updating src/$1"
+      fi
+    fi
+  elif (test -n "$2") then
+    if (test ! -e ../$2) then
+      rm -f ../$1
+    fi
+  fi
+}
+
+# all package files with no dependencies
+
+for file in *.cpp *.h; do
+  action $file
+done
+
+# edit 2 Makefile.package files to include/exclude package info
+
+if (test $1 = 1) then
+
+  if (test -e ../Makefile.package) then
+    sed -i -e 's/[^ \t]*mscg[^ \t]* //' ../Makefile.package
+    sed -i -e 's|^PKG_INC =[ \t]*|&-I../../lib/mscg/includelink |' ../Makefile.package
+    sed -i -e 's|^PKG_PATH =[ \t]*|&-L../../lib/mscg/liblink |' ../Makefile.package
+    sed -i -e 's|^PKG_LIB =[ \t]*|&-lmscg |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(mscg_SYSINC) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(mscg_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(mscg_SYSPATH) |' ../Makefile.package
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    sed -i -e '/^include.*mscg.*$/d' ../Makefile.package.settings
+    # multiline form needed for BSD sed on Macs
+    sed -i -e '4 i \
+include ..\/..\/lib\/mscg\/Makefile.lammps
+' ../Makefile.package.settings
+  fi
+
+elif (test $1 = 0) then
+
+  if (test -e ../Makefile.package) then
+    sed -i -e 's/[^ \t]*mscg[^ \t]* //' ../Makefile.package
+  fi
+
+  if (test -e ../Makefile.package.settings) then
+    sed -i -e '/^include.*mscg.*$/d' ../Makefile.package.settings
+  fi
+
+fi
--- a/src/MSCG/README
+++ b/src/MSCG/README
@ -0,0 +1,18 @@
+The USER-MSCG package adds a fix mscg command, which carries out
+multi-scale coarse-graining for the parameterization of coarse-grained
+(CG) interactions.
+
+It uses the Multi-Scale Coarse-Graining (MS-CG) library, available at
+https://github.com/uchicago-voth/MSCG-release.  The MSCG library was
+developed by Jacob Wagner in Greg Voth's group at the University of
+Chicago.
+
+The library can be downloaded and built in lib/mscg or elsewhere on
+your system, which must be done before bulding LAMMPS with this
+package.  Details of the download, build, and install process for MSCG
+are given in the lib/mscg/README file.  Also see the LAMMPS manual for
+general information on building LAMMPS with external libraries. The
+settings in the Makefile.lammps file in lib/mscg must be correct for
+LAMMPS to build correctly with this package installed.
+
+There are example scripts for using this package in examples/mscg.
--- a/src/MSCG/fix_mscg.cpp
+++ b/src/MSCG/fix_mscg.cpp
@ -0,0 +1,330 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Lauren Abbott (Sandia)
+------------------------------------------------------------------------- */
+
+#include <mpi.h>
+#include <string.h>
+#include <stdlib.h>
+#include "fix_mscg.h"
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "force.h"
+#include "input.h"
+#include "memory.h"
+#include "modify.h"
+#include "neigh_list.h"
+#include "neighbor.h"
+#include "region.h"
+#include "update.h"
+#include "variable.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+/* ---------------------------------------------------------------------- */
+
+FixMSCG::FixMSCG(LAMMPS *lmp, int narg, char **arg) :
+  Fix(lmp, narg, arg)
+{
+  if (narg < 4) error->all(FLERR,"Illegal fix mscg command");
+  nevery = force->inumeric(FLERR,arg[3]);
+  if (nevery <= 0) error->all(FLERR,"Illegal fix mscg command");
+
+  me = comm->me;
+  nprocs = comm->nprocs;
+  if (nprocs > 1) error->all(FLERR,"Fix mscg does not yet support "
+                             "parallel use via MPI");
+
+  if (sizeof(tagint) != sizeof(smallint))
+    error->all(FLERR,"Fix mscg must be used with 32-bit atom IDs");
+
+  // initialize
+
+  int natoms = atom->natoms;
+  int ntypes = atom->ntypes;
+
+  max_partners_bond = 4;
+  max_partners_angle = 12;
+  max_partners_dihedral = 36;
+  nframes = n_frames = block_size = 0;
+  range_flag = 0;
+  name_flag = 0;
+  f = NULL;
+
+  type_names = new char*[natoms];
+  for (int i = 0; i < natoms; i++) type_names[i] = new char[24];
+
+  // parse remaining arguments
+
+  int iarg = 4;
+  while(iarg < narg) {
+    if (strcmp(arg[iarg],"range") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal fix mscg command");
+      if (strcmp(arg[iarg+1],"on") == 0)
+        range_flag = 1;
+      else if (strcmp(arg[iarg+1],"off") == 0)
+        range_flag = 0;
+      else error->all(FLERR,"Illegal fix mscg command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"name") == 0) {
+      if (iarg+ntypes+1 > narg)
+        error->all(FLERR,"Illegal fix mscg command");
+      name_flag = 1;
+      for (int i = 0; i < ntypes; i++) {
+        iarg += 1;
+        std::string str = arg[iarg];
+        type_names[i] = strcat(strdup(str.c_str()),"\0");
+      }
+      iarg += 1;
+    } else if (strcmp(arg[iarg],"max") == 0) {
+      if (iarg+4 > narg) error->all(FLERR,"Illegal fix mscg command");
+      max_partners_bond = atoi(arg[iarg+1]);
+      max_partners_angle = atoi(arg[iarg+2]);
+      max_partners_dihedral = atoi(arg[iarg+3]);
+      iarg += 4;
+    } else error->all(FLERR,"Illegal fix mscg command");
+  }
+
+  if (name_flag == 0) {
+    for (int i = 0; i < natoms; i++) {
+      std::string str = std::to_string(i+1);
+      type_names[i] = strcat(strdup(str.c_str()),"\0");
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixMSCG::~FixMSCG()
+{
+  memory->destroy(f);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixMSCG::setmask()
+{
+  int mask = 0;
+  mask |= END_OF_STEP;
+  return mask;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixMSCG::post_constructor()
+{
+  if (domain->triclinic == 1)
+    error->all(FLERR,"Fix mscg does not yet support triclinic geometries");
+
+  // topology information
+  // sort by atom id to send to mscg lib
+
+  int natoms = atom->natoms;
+  int nlocal = atom->nlocal;
+  tagint *tag = atom->tag;
+  int *type = atom->type;
+  int *num_bond = atom->num_bond;
+  int **bond_atom = atom->bond_atom;
+  int *num_angle = atom->num_angle;
+  int **angle_atom1 = atom->angle_atom1;
+  int **angle_atom3 = atom->angle_atom3;
+  int *num_dihedral = atom->num_dihedral;
+  int **dihedral_atom1 = atom->dihedral_atom1;
+  int **dihedral_atom3 = atom->dihedral_atom3;
+  int **dihedral_atom4 = atom->dihedral_atom4;
+  double *prd_half = domain->prd_half;
+  int i,ii,j,jj,jnum,k,l;
+
+  n_cg_sites = natoms;
+  n_cg_types = atom->ntypes;
+
+  memory->grow(f,nlocal,3,"fix_mscg:f");
+  f1d = new double[n_cg_sites*3]();
+  x1d = new double[n_cg_sites*3]();
+  cg_site_types = new int[n_cg_sites]();
+  n_partners_bond = new unsigned[n_cg_sites]();
+  n_partners_angle = new unsigned[n_cg_sites]();
+  n_partners_dihedral = new unsigned[n_cg_sites]();
+  partners_bond = new unsigned*[n_cg_sites];
+  for (i = 0; i < n_cg_sites; i++)
+    partners_bond[i] = new unsigned[1*max_partners_bond]();
+  partners_angle = new unsigned*[n_cg_sites];
+  for (i = 0; i < n_cg_sites; i++)
+    partners_angle[i] = new unsigned[2*max_partners_angle]();
+  partners_dihedral = new unsigned*[n_cg_sites];
+  for (i = 0; i < n_cg_sites; i++)
+    partners_dihedral[i] = new unsigned[3*max_partners_dihedral]();
+
+  for (i = 0; i < 3; i++)
+    box_half_lengths[i] = prd_half[i];
+
+  for (i = 0; i < nlocal; i++) {
+    cg_site_types[i] = 0;
+    n_partners_bond[i] = 0;
+    n_partners_angle[i] = 0;
+    n_partners_dihedral[i] = 0;
+  }
+
+  for (ii = 0; ii < nlocal; ii++) {
+    i = tag[ii];
+    cg_site_types[i-1] = type[ii];
+
+    jnum = num_bond[ii];
+    for (jj = 0; jj < jnum; jj++) {
+      j = bond_atom[ii][jj];
+      if (n_partners_bond[i-1] >= max_partners_bond ||
+          n_partners_bond[j-1] >= max_partners_bond)
+        error->all(FLERR,"Bond list overflow, boost fix_mscg max");
+      partners_bond[i-1][n_partners_bond[i-1]] = j-1;
+      partners_bond[j-1][n_partners_bond[j-1]] = i-1;
+      n_partners_bond[i-1]++;
+      n_partners_bond[j-1]++;
+    }
+
+    jnum = num_angle[ii];
+    for (jj = 0; jj < jnum; jj++) {
+      j = angle_atom1[ii][jj];
+      k = angle_atom3[ii][jj];
+      if (n_partners_angle[j-1] >= max_partners_angle ||
+          n_partners_angle[k-1] >= max_partners_angle)
+        error->all(FLERR,"Angle list overflow, boost fix_mscg max");
+      partners_angle[j-1][n_partners_angle[j-1]*2] = i-1;
+      partners_angle[j-1][n_partners_angle[j-1]*2+1] = k-1;
+      partners_angle[k-1][n_partners_angle[k-1]*2] = i-1;
+      partners_angle[k-1][n_partners_angle[k-1]*2+1] = j-1;
+      n_partners_angle[j-1]++;
+      n_partners_angle[k-1]++;
+    }
+
+    jnum = num_dihedral[ii];
+    for (jj = 0; jj < jnum; jj++) {
+      j = dihedral_atom1[ii][jj];
+      k = dihedral_atom3[ii][jj];
+      l = dihedral_atom4[ii][jj];
+      if (n_partners_dihedral[j-1] >= max_partners_dihedral ||
+          n_partners_dihedral[l-1] >= max_partners_dihedral)
+        error->all(FLERR,"Dihedral list overflow, boost fix_mscg max");
+      partners_dihedral[j-1][n_partners_dihedral[j-1]*3] = i-1;
+      partners_dihedral[j-1][n_partners_dihedral[j-1]*3+1] = k-1;
+      partners_dihedral[j-1][n_partners_dihedral[j-1]*3+2] = l-1;
+      partners_dihedral[l-1][n_partners_dihedral[l-1]*3] = k-1;
+      partners_dihedral[l-1][n_partners_dihedral[l-1]*3+1] = i-1;
+      partners_dihedral[l-1][n_partners_dihedral[l-1]*3+2] = j-1;
+      n_partners_dihedral[j-1]++;
+      n_partners_dihedral[l-1]++;
+    }
+  }
+
+  // pass topology data to mscg code and run startup
+
+  fprintf(screen,"Initializing MSCG with topology data ...\n");
+  if (range_flag)
+    mscg_struct = rangefinder_startup_part1(mscg_struct);
+  else
+    mscg_struct = mscg_startup_part1(mscg_struct);
+
+  n_frames = get_n_frames(mscg_struct);
+  block_size = get_block_size(mscg_struct);
+
+  mscg_struct =
+    setup_topology_and_frame(mscg_struct,n_cg_sites,n_cg_types,type_names,
+                             cg_site_types,box_half_lengths);
+  mscg_struct =
+    set_bond_topology(mscg_struct,partners_bond,n_partners_bond);
+  mscg_struct =
+    set_angle_topology(mscg_struct,partners_angle,n_partners_angle);
+  mscg_struct =
+    set_dihedral_topology(mscg_struct,partners_dihedral,n_partners_dihedral);
+  mscg_struct =
+    generate_exclusion_topology(mscg_struct);
+
+  if (range_flag)
+    mscg_struct = rangefinder_startup_part2(mscg_struct);
+  else
+    mscg_struct = mscg_startup_part2(mscg_struct);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixMSCG::init()
+{
+  int nlocal = atom->nlocal;
+  double **force = atom->f;
+  int i;
+
+  // forces are reset to 0 before pre_force, saved here
+  // init called for each frame of dump in rerun command
+
+  for (i = 0; i < nlocal; i++) {
+    f[i][0] = force[i][0];
+    f[i][1] = force[i][1];
+    f[i][2] = force[i][2];
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixMSCG::end_of_step()
+{
+  if (domain->triclinic == 1)
+    error->all(FLERR,"Fix mscg does not yet support triclinic geometries");
+
+  int natoms = atom->natoms;
+  int nlocal = atom->nlocal;
+  tagint *tag = atom->tag;
+  double **x = atom->x;
+  double *prd_half = domain->prd_half;
+  int i,ii,j;
+
+  // trajectory information
+
+  for (ii = 0; ii < nlocal; ii++) {
+    i = tag[ii];
+    for (j = 0; j < 3; j++) {
+      x1d[(i-1)*3+j] = x[ii][j];
+      f1d[(i-1)*3+j] = f[ii][j];
+    }
+  }
+
+  // pass x,f to mscg to update matrix
+
+  nframes++;
+  if (range_flag)
+    mscg_struct = rangefinder_process_frame(mscg_struct,x1d,f1d);
+  else
+    mscg_struct = mscg_process_frame(mscg_struct,x1d,f1d);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixMSCG::post_run()
+{
+  // call mscg to solve matrix and generate output
+
+  fprintf(screen,"Finalizing MSCG ...\n");
+
+  if (nframes != n_frames)
+    error->warning(FLERR,"Fix mscg n_frames is inconsistent with control.in");
+  if (nframes % block_size != 0)
+    error->warning(FLERR,"Fix mscg n_frames is not divisible by "
+                   "block_size in control.in");
+
+  if (range_flag)
+    rangefinder_solve_and_output(mscg_struct);
+  else
+    mscg_solve_and_output(mscg_struct);
+}
--- a/src/MSCG/fix_mscg.h
+++ b/src/MSCG/fix_mscg.h
@ -0,0 +1,91 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(mscg,FixMSCG)
+
+#else
+
+#ifndef LMP_FIX_MSCG_H
+#define LMP_FIX_MSCG_H
+
+#include "fix.h"
+#include "mscg.h"
+
+namespace LAMMPS_NS {
+
+class FixMSCG : public Fix {
+ public:
+  FixMSCG(class LAMMPS *, int, char **);
+  ~FixMSCG();
+  int setmask();
+  void post_constructor();
+  void init();
+  void end_of_step();
+  void post_run();
+
+ private:
+  int range_flag,name_flag,me,nprocs;
+  int nframes,n_frames,block_size,n_cg_sites,n_cg_types,*cg_site_types;
+  int max_partners_bond,max_partners_angle,max_partners_dihedral;
+  unsigned *n_partners_bond,*n_partners_angle,*n_partners_dihedral;
+  unsigned **partners_bond,**partners_angle,**partners_dihedral;
+  double *x1d,*f1d,**f;
+  double box_half_lengths[3];
+  char **type_names;
+  void *mscg_struct;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: Illegal ... command
+
+Self-explanatory.  Check the input script syntax and compare to the
+documentation for the command.  You can use -echo screen as a
+command-line option when running LAMMPS to see the offending line.
+
+E: Fix mscg does not yet support mpi
+
+Self-explanatory.
+
+E: Fix mscg does not yet support triclinic geometries
+
+Self-explanatory.
+
+E: Bond/Angle/Dihedral list overflow, boost fix_mscg max
+
+A site has more bond/angle/dihedral partners that the maximum and
+has overflowed the bond/angle/dihedral partners list. Increase the
+corresponding fix_mscg max arg.
+
+W: Fix mscg n_frames is inconsistent with control.in
+
+The control.in file read by the MSCG lib has a parameter n_frames
+that should be equal to the number of frames processed by the
+fix mscg command. If not equal, the fix will still run, but the
+calculated residuals may be normalized incorrectly.
+
+W: Fix mscg n_frames is not divisible by block_size in control.in
+
+The control.in file read by the MSCG lib has a parameter block_size
+that should be a divisor of the number of frames processed by the
+fix mscg command. If not, the fix will still run, but some frames may
+not be included in the MSCG calculations.
+
+*/
--- a/src/Makefile
+++ b/src/Makefile
@ -44,8 +44,8 @@ endif
 # Package variables

 PACKAGE = asphere body class2 colloid compress coreshell dipole gpu \
-	  granular kim \
-	  kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \
+	  granular kim kokkos kspace manybody mc meam misc molecule \
+	  mpiio mscg opt peri poems \
 	  python qeq reax replica rigid shock snap srd voronoi

 PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
@ -55,7 +55,7 @@ PACKUSER = user-atc user-awpmd user-cg-cmm user-colvars \
 	   user-quip user-reaxc user-smd user-smtbq user-sph user-tally \
 	   user-vtk

-PACKLIB = compress gpu kim kokkos meam mpiio poems python reax voronoi \
+PACKLIB = compress gpu kim kokkos meam mpiio mscg poems python reax voronoi \
 	  user-atc user-awpmd user-colvars user-h5md user-lb user-molfile \
 	  user-nc-dump user-qmmm user-quip user-smd user-vtk

--- a/src/USER-INTEL/TEST/in.intel.eam
+++ b/src/USER-INTEL/TEST/in.intel.eam
@ -0,0 +1,46 @@
+# bulk Cu lattice
+variable	w index 10      # Warmup Timesteps
+variable	t index 3100    # Main Run Timesteps
+variable	m index 1       # Main Run Timestep Multiplier
+variable	n index 0       # Use NUMA Mapping for Multi-Node
+variable	b index 3       # Neighbor binsize
+variable	p index 0       # Use Power Measurement
+
+variable	x index 4
+variable	y index 2
+variable	z index 2
+
+variable	rr equal floor($t*$m)
+variable	root getenv LMP_ROOT
+
+if "$n > 0"	then "processors * * * grid numa"
+
+variable	xx equal 20*$x
+variable	yy equal 20*$y
+variable	zz equal 20*$z
+
+units		metal
+atom_style	atomic
+
+lattice		fcc 3.615
+region		box block 0 ${xx} 0 ${yy} 0 ${zz}
+create_box	1 box
+create_atoms	1 box
+
+pair_style	eam
+pair_coeff	1 1 ${root}/bench/Cu_u3.eam
+
+velocity	all create 1600.0 376847 loop geom
+
+neighbor	1.0 bin
+neigh_modify    every 1 delay 5 check yes
+
+fix		1 all nve
+
+timestep	0.005
+thermo		50
+
+if "$p > 0"	then "run_style verlet/power"
+
+if "$w > 0"	then "run $w"
+run		${rr}
--- a/src/USER-INTEL/pair_eam_intel.cpp
+++ b/src/USER-INTEL/pair_eam_intel.cpp
@ -0,0 +1,788 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: W. Michael Brown (Intel)
+------------------------------------------------------------------------- */
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "pair_eam_intel.h"
+#include "atom.h"
+#include "force.h"
+#include "comm.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "neigh_request.h"
+#include "memory.h"
+#include "error.h"
+#include "suffix.h"
+
+using namespace LAMMPS_NS;
+
+#define MAXLINE 1024
+
+#define FC_PACKED1_T typename ForceConst<flt_t>::fc_packed1
+#define FC_PACKED2_T typename ForceConst<flt_t>::fc_packed2
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMIntel::PairEAMIntel(LAMMPS *lmp) : PairEAM(lmp)
+{
+  suffix_flag |= Suffix::INTEL;
+  fp_float = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairEAMIntel::~PairEAMIntel()
+{
+  memory->destroy(fp_float);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAMIntel::compute(int eflag, int vflag)
+{
+  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+    compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
+                          force_const_single);
+  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    compute<double,double>(eflag, vflag, fix->get_double_buffers(),
+                           force_const_double);
+  else
+    compute<float,float>(eflag, vflag, fix->get_single_buffers(),
+                         force_const_single);
+
+  fix->balance_stamp();
+  vflag_fdotr = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t, class acc_t>
+void PairEAMIntel::compute(int eflag, int vflag,
+			   IntelBuffers<flt_t,acc_t> *buffers,
+			   const ForceConst<flt_t> &fc)
+{
+  if (eflag || vflag) {
+    ev_setup(eflag, vflag);
+  } else evflag = vflag_fdotr = 0;
+
+  const int inum = list->inum;
+  const int nthreads = comm->nthreads;
+  const int host_start = fix->host_start_pair();
+  const int offload_end = fix->offload_end_pair();
+  const int ago = neighbor->ago;
+
+  if (ago != 0 && fix->separate_buffers() == 0) {
+    fix->start_watch(TIME_PACK);
+
+    #if defined(_OPENMP)
+    #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
+    #endif
+    {
+      int ifrom, ito, tid;
+      IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
+                                nthreads, sizeof(ATOM_T));
+      buffers->thr_pack(ifrom,ito,ago);
+    }
+    fix->stop_watch(TIME_PACK);
+  }
+
+  if (_onetype) {
+    if (evflag || vflag_fdotr) {
+      int ovflag = 0;
+      if (vflag_fdotr) ovflag = 2;
+      else if (vflag) ovflag = 1;
+      if (eflag) {
+        if (force->newton_pair) {
+          eval<1,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<1,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
+        } else {
+          eval<1,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<1,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
+        }
+      } else {
+        if (force->newton_pair) {
+          eval<1,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<1,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
+        } else {
+          eval<1,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<1,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
+        }
+      }
+    } else {
+      if (force->newton_pair) {
+        eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
+        eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
+      } else {
+        eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
+        eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
+      }
+    }
+  } else {
+    if (evflag || vflag_fdotr) {
+      int ovflag = 0;
+      if (vflag_fdotr) ovflag = 2;
+      else if (vflag) ovflag = 1;
+      if (eflag) {
+        if (force->newton_pair) {
+          eval<0,1,1,1>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<0,1,1,1>(0, ovflag, buffers, fc, host_start, inum);
+        } else {
+          eval<0,1,1,0>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<0,1,1,0>(0, ovflag, buffers, fc, host_start, inum);
+        }
+      } else {
+        if (force->newton_pair) {
+          eval<0,1,0,1>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<0,1,0,1>(0, ovflag, buffers, fc, host_start, inum);
+        } else {
+          eval<0,1,0,0>(1, ovflag, buffers, fc, 0, offload_end);
+	  eval<0,1,0,0>(0, ovflag, buffers, fc, host_start, inum);
+        }
+      }
+    } else {
+      if (force->newton_pair) {
+        eval<0,0,0,1>(1, 0, buffers, fc, 0, offload_end);
+        eval<0,0,0,1>(0, 0, buffers, fc, host_start, inum);
+      } else {
+        eval<0,0,0,0>(1, 0, buffers, fc, 0, offload_end);
+        eval<0,0,0,0>(0, 0, buffers, fc, host_start, inum);
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, 
+	  class acc_t>
+void PairEAMIntel::eval(const int offload, const int vflag,
+			IntelBuffers<flt_t,acc_t> *buffers,
+			const ForceConst<flt_t> &fc,
+                        const int astart, const int aend)
+{
+  const int inum = aend - astart;
+  if (inum == 0) return;
+
+  flt_t *fp_f;
+  if (atom->nmax > nmax) {
+    memory->destroy(rho);
+    memory->destroy(fp);
+    nmax = atom->nmax;
+    int edge = (nmax * sizeof(acc_t)) % INTEL_DATA_ALIGN;
+    if (edge) nmax += (INTEL_DATA_ALIGN - edge) / sizeof(acc_t);
+    memory->create(rho,nmax*comm->nthreads,"pair:rho");
+    memory->create(fp,nmax,"pair:fp");
+    // Use single precision allocation for single/mixed mode
+    // Keep double version for single and swap_eam
+    if (sizeof(flt_t)==sizeof(float)) {
+      memory->destroy(fp_float);
+      memory->create(fp_float,nmax,"pair::fp_float");
+    }
+  }
+  if (sizeof(flt_t)==sizeof(float))
+    fp_f = (flt_t *)fp_float;
+  else
+    fp_f = (flt_t *)fp;
+
+
+  int nlocal, nall, minlocal;
+  fix->get_buffern(offload, nlocal, nall, minlocal);
+
+  const int ago = neighbor->ago;
+  IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
+
+  ATOM_T * _noalias const x = buffers->get_x(offload);
+
+  const int * _noalias const numneigh = list->numneigh;
+  const int * _noalias const cnumneigh = buffers->cnumneigh(list);
+  const int * _noalias const firstneigh = buffers->firstneigh(list);
+  const FC_PACKED1_T * _noalias const rhor_spline_f = fc.rhor_spline_f;
+  const FC_PACKED1_T * _noalias const rhor_spline_e = fc.rhor_spline_e;
+  const FC_PACKED2_T * _noalias const z2r_spline_t = fc.z2r_spline_t;
+  const FC_PACKED1_T * _noalias const frho_spline_f = fc.frho_spline_f;
+  const FC_PACKED1_T * _noalias const frho_spline_e = fc.frho_spline_e;
+  const flt_t * _noalias const scale_f = fc.scale_f[0];
+
+  const int ntypes = atom->ntypes + 1;
+  const int eatom = this->eflag_atom;
+
+  // Determine how much data to transfer
+  int x_size, q_size, f_stride, ev_size, separate_flag;
+  IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
+                       buffers, offload, fix, separate_flag,
+                       x_size, q_size, ev_size, f_stride);
+
+  int tc;
+  FORCE_T * _noalias f_start;
+  acc_t * _noalias ev_global;
+  IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
+  const int nthreads = tc;
+  int *overflow = fix->get_off_overflow_flag();
+
+  const flt_t frdr = rdr;
+  const flt_t frdrho = rdrho;
+  const flt_t frhomax = rhomax;
+  const flt_t fcutforcesq = cutforcesq;
+  const int istride = fc.rhor_istride();
+  const int jstride = fc.rhor_jstride();
+  const int fstride = fc.frho_stride();
+
+  {
+    #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
+    *timer_compute = MIC_Wtime();
+    #endif
+
+    IP_PRE_repack_for_offload(NEWTON_PAIR, separate_flag, nlocal, nall,
+                              f_stride, x, 0);
+
+    acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
+    if (EVFLAG) {
+      oevdwl = (acc_t)0;
+      if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
+    }
+
+    // loop over neighbors of my atoms
+    #if defined(_OPENMP)
+    #pragma omp parallel default(none) \
+      shared(fp_f, f_start,f_stride,nlocal,nall,minlocal)	\
+      reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
+    #endif
+    {
+      int iifrom, iito, tid;
+      IP_PRE_omp_range_id_vec(iifrom, iito, tid, inum, nthreads, 
+			      INTEL_VECTOR_WIDTH);
+      iifrom += astart;
+      iito += astart;
+
+      FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
+      double * _noalias const trho = rho + tid*nmax;
+      if (NEWTON_PAIR)
+	memset(trho, 0, nall * sizeof(double));
+      else
+	memset(trho, 0, nlocal * sizeof(double));
+
+      flt_t oscale;
+      int rhor_joff, frho_ioff;
+      if (ONETYPE) {
+        const int ptr_off=_onetype * ntypes + _onetype;
+	oscale = scale_f[ptr_off];
+	int rhor_ioff = istride * _onetype;
+	rhor_joff = rhor_ioff + _onetype * jstride;
+	frho_ioff = fstride * _onetype;
+      }
+      for (int i = iifrom; i < iito; ++i) {
+        int itype, rhor_ioff;
+	if (!ONETYPE) {
+          itype = x[i].w;
+	  rhor_ioff = istride * itype;
+	}
+	const int * _noalias const jlist = firstneigh + cnumneigh[i];
+	const int jnum = numneigh[i];
+
+	const flt_t xtmp = x[i].x;
+	const flt_t ytmp = x[i].y;
+	const flt_t ztmp = x[i].z;
+
+	acc_t rhoi = (acc_t)0.0;
+	#if defined(LMP_SIMD_COMPILER)
+	#pragma vector aligned
+        #pragma simd reduction(+:rhoi)
+	#endif
+	for (int jj = 0; jj < jnum; jj++) {
+          int j, jtype;
+	  j = jlist[jj] & NEIGHMASK;
+
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+	  const flt_t rsq = delx*delx + dely*dely + delz*delz;
+
+	  if (rsq < fcutforcesq) {
+	    if (!ONETYPE) jtype = x[j].w;
+	    flt_t p = sqrt(rsq)*frdr + (flt_t)1.0;
+	    int m = static_cast<int> (p);
+	    m = MIN(m,nr-1);
+	    p -= m;
+	    p = MIN(p,(flt_t)1.0);
+	    if (!ONETYPE)
+	      rhor_joff = rhor_ioff + jtype * jstride;
+	    const int joff = rhor_joff + m;
+	    flt_t ra;
+	    ra = ((rhor_spline_e[joff].a*p + rhor_spline_e[joff].b) * p +
+		  rhor_spline_e[joff].c) * p + rhor_spline_e[joff].d;
+	    rhoi += ra;
+	    if (NEWTON_PAIR || j < nlocal) {
+	      if (!ONETYPE) {
+		const int ioff = jtype * istride + itype * jstride + m;
+		ra = ((rhor_spline_e[ioff].a*p + rhor_spline_e[ioff].b)*p +
+                      rhor_spline_e[ioff].c) * p + rhor_spline_e[ioff].d;
+	      }
+	      trho[j] += ra;
+            }
+          }
+        } // for jj
+	trho[i] += rhoi;
+      } // for i
+
+      #if defined(_OPENMP)
+      if (nthreads > 1) {
+        #pragma omp barrier
+        if (tid == 0) {  
+          int rcount;
+	  if (NEWTON_PAIR) rcount = nall;
+	  else rcount = nlocal;
+	  if (nthreads == 2) {
+            double *trho2 = rho + nmax;
+	    #pragma vector aligned
+            #pragma simd
+	    for (int n = 0; n < rcount; n++)
+	      rho[n] += trho2[n];
+          } else if (nthreads == 4) {
+            double *trho2 = rho + nmax;
+	    double *trho3 = trho2 + nmax;
+	    double *trho4 = trho3 + nmax;
+	    #pragma vector aligned
+	    #pragma simd
+	    for (int n = 0; n < rcount; n++)
+	      rho[n] += trho2[n] + trho3[n] + trho4[n];
+          } else {
+	    double *trhon = rho + nmax;
+	    for (int t = 1; t < nthreads; t++) {
+  	      #pragma vector aligned
+	      #pragma simd
+	      for (int n = 0; n < rcount; n++)
+	        rho[n] += trhon[n];
+	      trhon += nmax;
+            }
+          }
+        }
+      }
+      #endif
+
+      // communicate and sum densities
+
+      if (NEWTON_PAIR) {
+        if (tid == 0)
+          comm->reverse_comm_pair(this);
+      }
+      #if defined(_OPENMP)
+      #pragma omp barrier
+      #endif
+
+      // fp = derivative of embedding energy at each atom
+      // phi = embedding energy at each atom
+      // if rho > rhomax (e.g. due to close approach of two atoms),
+      //   will exceed table, so add linear term to conserve energy
+
+      acc_t tevdwl;
+      if (EFLAG) tevdwl = (acc_t)0.0;
+
+      #if defined(LMP_SIMD_COMPILER)
+      #pragma vector aligned
+      #pragma simd reduction(+:tevdwl)
+      #endif
+      for (int i = iifrom; i < iito; ++i) {
+	int itype;
+	if (!ONETYPE) itype = x[i].w;
+	flt_t p = rho[i]*frdrho + (flt_t)1.0;
+	int m = static_cast<int> (p);
+	m = MAX(1,MIN(m,nrho-1));
+	p -= m;
+	p = MIN(p,(flt_t)1.0);
+	if (!ONETYPE) frho_ioff = itype * fstride;
+	const int ioff = frho_ioff + m;
+	fp_f[i] = (frho_spline_f[ioff].a*p + frho_spline_f[ioff].b)*p + 
+	  frho_spline_f[ioff].c;
+	if (EFLAG) {
+	  flt_t phi = ((frho_spline_e[ioff].a*p + frho_spline_e[ioff].b)*p + 
+		       frho_spline_e[ioff].c)*p + frho_spline_e[ioff].d;
+	  if (rho[i] > frhomax) phi += fp_f[i] * (rho[i]-frhomax);
+	  if (!ONETYPE) {
+	    const int ptr_off=itype*ntypes + itype;
+	    oscale = scale_f[ptr_off];
+	  }
+	  phi *= oscale;
+	  tevdwl += phi;
+	  if (eatom) f[i].w += phi;
+	}
+      }
+      if (EFLAG) oevdwl += tevdwl;
+      
+
+      // communicate derivative of embedding function
+
+      #if defined(_OPENMP)
+      #pragma omp barrier
+      #endif
+
+      if (tid == 0) {
+        comm->forward_comm_pair(this);
+	memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
+      } else
+	memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
+
+      #if defined(_OPENMP)
+      #pragma omp barrier
+      #endif
+
+      // compute forces on each atom
+      // loop over neighbors of my atoms
+
+      for (int i = iifrom; i < iito; ++i) {
+        int itype, rhor_ioff;
+	const flt_t * _noalias scale_fi;
+	if (!ONETYPE) {
+	  itype = x[i].w;
+	  rhor_ioff = istride * itype;
+	  scale_fi = scale_f + itype*ntypes;
+	}
+	const int * _noalias const jlist = firstneigh + cnumneigh[i];
+	const int jnum = numneigh[i];
+
+	acc_t fxtmp, fytmp, fztmp, fwtmp;
+	acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
+
+	const flt_t xtmp = x[i].x;
+	const flt_t ytmp = x[i].y;
+	const flt_t ztmp = x[i].z;
+	fxtmp = fytmp = fztmp = (acc_t)0;
+	if (EVFLAG) {
+          if (EFLAG) fwtmp = sevdwl = (acc_t)0;
+          if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
+        }
+
+	#if defined(LMP_SIMD_COMPILER)
+	#pragma vector aligned
+	#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl,	\
+	                         sv0, sv1, sv2, sv3, sv4, sv5)
+	#endif
+	for (int jj = 0; jj < jnum; jj++) {
+          int j, jtype;
+	  j = jlist[jj] & NEIGHMASK;
+
+          const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+	  const flt_t rsq = delx*delx + dely*dely + delz*delz;
+
+
+	  if (rsq < fcutforcesq) {
+	    if (!ONETYPE) jtype = x[j].w;
+	    const flt_t r = sqrt(rsq);
+	    flt_t p = r*frdr + (flt_t)1.0;
+	    int m = static_cast<int> (p);
+	    m = MIN(m,nr-1);
+	    p -= m;
+	    p = MIN(p,(flt_t)1.0);
+	    if (!ONETYPE)
+	      rhor_joff = rhor_ioff + jtype * jstride;
+	    const int joff = rhor_joff + m;
+	    const flt_t rhojp = (rhor_spline_f[joff].a*p + 
+                                 rhor_spline_f[joff].b)*p + 
+	                        rhor_spline_f[joff].c;
+	    flt_t rhoip;
+	    if (!ONETYPE) {
+	      const int ioff = jtype * istride + itype * jstride + m;
+	      rhoip = (rhor_spline_f[ioff].a*p + rhor_spline_f[ioff].b)*p + 
+		      rhor_spline_f[ioff].c;
+	    } else
+	      rhoip = rhojp;
+	    const flt_t z2p = (z2r_spline_t[joff].a*p + 
+                               z2r_spline_t[joff].b)*p + 
+                              z2r_spline_t[joff].c;
+	    const flt_t z2 = ((z2r_spline_t[joff].d*p + 
+			       z2r_spline_t[joff].e)*p + 
+			      z2r_spline_t[joff].f)*p + 
+	                     z2r_spline_t[joff].g;
+
+	    const flt_t recip = (flt_t)1.0/r;
+	    const flt_t phi = z2*recip;
+	    const flt_t phip = z2p*recip - phi*recip;
+	    const flt_t psip = fp_f[i]*rhojp + fp_f[j]*rhoip + phip;
+	    if (!ONETYPE)
+	      oscale = scale_fi[jtype];
+	    const flt_t fpair = -oscale*psip*recip;
+
+	    fxtmp += delx*fpair;
+	    fytmp += dely*fpair;
+	    fztmp += delz*fpair;
+	    if (NEWTON_PAIR || j < nlocal) {
+              f[j].x -= delx*fpair;
+	      f[j].y -= dely*fpair;
+	      f[j].z -= delz*fpair;
+            }
+
+	    if (EVFLAG) {
+              flt_t ev_pre = (flt_t)0;
+              if (NEWTON_PAIR || i<nlocal)
+	        ev_pre += (flt_t)0.5;
+              if (NEWTON_PAIR || j<nlocal)
+	        ev_pre += (flt_t)0.5;
+
+	      if (EFLAG) {
+                const flt_t evdwl = oscale*phi;
+                sevdwl += ev_pre * evdwl;
+		if (eatom) {
+                  if (NEWTON_PAIR || i < nlocal)
+		    fwtmp += (flt_t)0.5 * evdwl;
+		  if (NEWTON_PAIR || j < nlocal)
+		    f[j].w += (flt_t)0.5 * evdwl;
+                }
+              }
+              IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
+	                           delx, dely, delz);
+            }
+          } // if rsq
+        } // for jj
+        f[i].x += fxtmp;
+        f[i].y += fytmp;
+        f[i].z += fztmp;
+
+        IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
+      } // for i
+
+      if (vflag == 2) {
+        #if defined(_OPENMP)
+        #pragma omp barrier
+        #endif
+        IP_PRE_fdotr_acc_force(NEWTON_PAIR, EVFLAG,  EFLAG, vflag, eatom, nall,
+			       nlocal, minlocal, nthreads, f_start, f_stride,
+			       x, offload);
+      }
+
+    } /// omp
+    if (EVFLAG) {
+      if (EFLAG) {
+        ev_global[0] = oevdwl;
+        ev_global[1] = (acc_t)0.0;
+      }
+      if (vflag) {
+        ev_global[2] = ov0;
+        ev_global[3] = ov1;
+        ev_global[4] = ov2;
+        ev_global[5] = ov3;
+        ev_global[6] = ov4;
+        ev_global[7] = ov5;
+      }
+    }
+    #if defined(__MIC__) && defined(_LMP_INTEL_OFFLOAD)
+    *timer_compute = MIC_Wtime() - *timer_compute;
+    #endif
+  }
+
+  if (offload)
+    fix->stop_watch(TIME_OFFLOAD_LATENCY);
+  else
+    fix->stop_watch(TIME_HOST_PAIR);
+
+  if (EVFLAG)
+    fix->add_result_array(f_start, ev_global, offload, eatom, 0, vflag);
+  else
+    fix->add_result_array(f_start, 0, offload);
+}
+
+/* ----------------------------------------------------------------------
+   init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairEAMIntel::init_style()
+{
+  PairEAM::init_style();
+  neighbor->requests[neighbor->nrequest-1]->intel = 1;
+
+  int ifix = modify->find_fix("package_intel");
+  if (ifix < 0)
+    error->all(FLERR,
+               "The 'package intel' command is required for /intel styles");
+  fix = static_cast<FixIntel *>(modify->fix[ifix]);
+
+  fix->pair_init_check();
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (fix->offload_balance() != 0.0)
+    error->all(FLERR,
+      "Offload for eam/intel is not yet available. Set balance to 0.");
+  #endif
+
+  if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+    pack_force_const(force_const_single, fix->get_mixed_buffers());
+  else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    pack_force_const(force_const_double, fix->get_double_buffers());
+  else
+    pack_force_const(force_const_single, fix->get_single_buffers());
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t, class acc_t>
+void PairEAMIntel::pack_force_const(ForceConst<flt_t> &fc,
+				    IntelBuffers<flt_t,acc_t> *buffers)
+{
+  int tp1 = atom->ntypes + 1;
+  fc.set_ntypes(tp1,nr,nrho,memory,_cop);
+  buffers->set_ntypes(tp1);
+  flt_t **cutneighsq = buffers->get_cutneighsq();
+
+  // Repeat cutsq calculation because done after call to init_style
+  double cut, cutneigh;
+  for (int i = 1; i <= atom->ntypes; i++) {
+    for (int j = i; j <= atom->ntypes; j++) {
+      if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+	cut = init_one(i,j);
+	cutneigh = cut + neighbor->skin;
+	cutsq[i][j] = cutsq[j][i] = cut*cut;
+	cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
+      }
+    }
+  }
+  
+  _onetype=-1;
+  double oldscale=-1;
+  for (int i = 1; i < tp1; i++) {
+    int ioff = i * fc.frho_stride();
+    for (int k = 0; k < nrho + 1; k++) {
+      fc.frho_spline_f[ioff + k].a = frho_spline[type2frho[i]][k][0];
+      fc.frho_spline_f[ioff + k].b = frho_spline[type2frho[i]][k][1];
+      fc.frho_spline_f[ioff + k].c = frho_spline[type2frho[i]][k][2];
+      fc.frho_spline_e[ioff + k].a = frho_spline[type2frho[i]][k][3];
+      fc.frho_spline_e[ioff + k].b = frho_spline[type2frho[i]][k][4];
+      fc.frho_spline_e[ioff + k].c = frho_spline[type2frho[i]][k][5];
+      fc.frho_spline_e[ioff + k].d = frho_spline[type2frho[i]][k][6];
+    }
+    ioff = i * fc.rhor_istride();
+    for (int j = 1; j < tp1; j++) {
+      fc.scale_f[i][j] = scale[i][j];
+      if (type2rhor[i][j] >= 0) {
+	const int joff = ioff + j * fc.rhor_jstride();
+	for (int k = 0; k < nr + 1; k++) {
+	  if (type2rhor[j][i] != type2rhor[i][j])
+	    _onetype = 0;
+          else if (_onetype < 0)
+	    _onetype = i;
+          if (oldscale < 0)
+            oldscale = scale[i][j];
+          else
+	    if (oldscale != scale[i][j])
+	      _onetype = 0;
+	  fc.rhor_spline_f[joff + k].a=rhor_spline[type2rhor[j][i]][k][0];
+	  fc.rhor_spline_f[joff + k].b=rhor_spline[type2rhor[j][i]][k][1];
+	  fc.rhor_spline_f[joff + k].c=rhor_spline[type2rhor[j][i]][k][2];
+	  fc.rhor_spline_e[joff + k].a=rhor_spline[type2rhor[j][i]][k][3];
+	  fc.rhor_spline_e[joff + k].b=rhor_spline[type2rhor[j][i]][k][4];
+	  fc.rhor_spline_e[joff + k].c=rhor_spline[type2rhor[j][i]][k][5];
+	  fc.rhor_spline_e[joff + k].d=rhor_spline[type2rhor[j][i]][k][6];
+	  fc.z2r_spline_t[joff + k].a=z2r_spline[type2rhor[j][i]][k][0];
+	  fc.z2r_spline_t[joff + k].b=z2r_spline[type2rhor[j][i]][k][1];
+	  fc.z2r_spline_t[joff + k].c=z2r_spline[type2rhor[j][i]][k][2];
+	  fc.z2r_spline_t[joff + k].d=z2r_spline[type2rhor[j][i]][k][3];
+	  fc.z2r_spline_t[joff + k].e=z2r_spline[type2rhor[j][i]][k][4];
+	  fc.z2r_spline_t[joff + k].f=z2r_spline[type2rhor[j][i]][k][5];
+	  fc.z2r_spline_t[joff + k].g=z2r_spline[type2rhor[j][i]][k][6];
+	}
+      }
+    }
+  }
+  if (_onetype < 0) _onetype = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class flt_t>
+void PairEAMIntel::ForceConst<flt_t>::set_ntypes(const int ntypes,
+						 const int nr, const int nrho,
+						 Memory *memory,
+						 const int cop) {
+  if (ntypes != _ntypes || nr > _nr || nrho > _nrho) {
+    if (_ntypes > 0) {
+      _memory->destroy(rhor_spline_f);
+      _memory->destroy(rhor_spline_e);
+      _memory->destroy(frho_spline_f);
+      _memory->destroy(frho_spline_e);
+      _memory->destroy(z2r_spline_t);
+      _memory->destroy(scale_f);
+    }
+    if (ntypes > 0) {
+      _cop = cop;
+      _nr = nr + 1;
+      int edge = (_nr * sizeof(flt_t)) % INTEL_DATA_ALIGN;
+      if (edge) _nr += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
+      memory->create(rhor_spline_f,ntypes*ntypes*_nr,"fc.rhor_spline_f");
+      memory->create(rhor_spline_e,ntypes*ntypes*_nr,"fc.rhor_spline_e");
+      memory->create(z2r_spline_t,ntypes*ntypes*_nr,"fc.z2r_spline_t");
+      _nrho = nrho + 1;
+      edge = (_nrho * sizeof(flt_t)) % INTEL_DATA_ALIGN;
+      if (edge) _nrho += (INTEL_DATA_ALIGN - edge) / sizeof(flt_t);
+      memory->create(frho_spline_f,ntypes*_nrho,"fc.frho_spline_f");
+      memory->create(frho_spline_e,ntypes*_nrho,"fc.frho_spline_e");
+      memory->create(scale_f,ntypes,ntypes,"fc.scale_f");
+    }
+  }
+  _ntypes = ntypes;
+  _memory = memory;
+}
+
+/* ---------------------------------------------------------------------- */
+
+int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf,
+				    int pbc_flag, int *pbc)
+{
+  if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    return pack_forward_comm(n, list, buf, fp);
+  else
+    return pack_forward_comm(n, list, buf, fp_float);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf)
+{
+  if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+    unpack_forward_comm(n, first, buf, fp);
+  else
+    unpack_forward_comm(n, first, buf, fp_float);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class flt_t>
+int PairEAMIntel::pack_forward_comm(int n, int *list, double *buf,
+				    flt_t *fp_f)
+{
+  int i,j,m;
+
+  m = 0;
+  for (i = 0; i < n; i++) {
+    j = list[i];
+    buf[m++] = fp_f[j];
+  }
+  return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class flt_t>
+void PairEAMIntel::unpack_forward_comm(int n, int first, double *buf, 
+				       flt_t *fp_f)
+{
+  int i,m,last;
+
+  m = 0;
+  last = first + n;
+  for (i = first; i < last; i++) fp_f[i] = buf[m++];
+}
+
--- a/src/USER-INTEL/pair_eam_intel.h
+++ b/src/USER-INTEL/pair_eam_intel.h
@ -0,0 +1,107 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(eam/intel,PairEAMIntel)
+
+#else
+
+#ifndef LMP_PAIR_EAM_INTEL_H
+#define LMP_PAIR_EAM_INTEL_H
+
+#include <stdio.h>
+#include "pair_eam.h"
+#include "fix_intel.h"
+
+namespace LAMMPS_NS {
+
+
+class PairEAMIntel : public PairEAM {
+ public:
+  friend class FixSemiGrandCanonicalMC;   // Alex Stukowski option
+
+  PairEAMIntel(class LAMMPS *);
+  virtual ~PairEAMIntel();
+  virtual void compute(int, int);
+  void init_style();
+  int pack_forward_comm(int, int *, double *, int, int *);
+  void unpack_forward_comm(int, int, double *);
+
+ protected:
+
+  FixIntel *fix;
+  int _cop, _onetype;
+  float *fp_float;
+
+  template <class flt_t>
+  int pack_forward_comm(int, int *, double *, flt_t *);
+  template <class flt_t>
+  void unpack_forward_comm(int, int, double *, flt_t *);
+
+  template <class flt_t> class ForceConst;
+  template <class flt_t, class acc_t>
+  void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
+               const ForceConst<flt_t> &fc);
+  template <int ONETYPE, int EVFLAG, int EFLAG, int NEWTON_PAIR, class flt_t, 
+	    class acc_t>
+  void eval(const int offload, const int vflag,
+            IntelBuffers<flt_t,acc_t> * buffers,
+            const ForceConst<flt_t> &fc, const int astart, const int aend);
+
+  template <class flt_t, class acc_t>
+  void pack_force_const(ForceConst<flt_t> &fc,
+                        IntelBuffers<flt_t, acc_t> *buffers);
+
+  // ----------------------------------------------------------------------
+
+  template <class flt_t>
+  class ForceConst {
+  public:
+    typedef struct { flt_t a, b, c, d; } fc_packed1;
+    typedef struct { flt_t a, b, c, d, e, f, g, h; } fc_packed2;
+
+    flt_t **scale_f;
+    fc_packed1 *rhor_spline_f, *rhor_spline_e;
+    fc_packed1 *frho_spline_f, *frho_spline_e;
+    fc_packed2 *z2r_spline_t;
+
+    ForceConst() : _ntypes(0), _nr(0)  {}
+    ~ForceConst() { set_ntypes(0, 0, 0, NULL, _cop); }
+
+    void set_ntypes(const int ntypes, const int nr, const int nrho, 
+		    Memory *memory, const int cop);
+    inline int rhor_jstride() const { return _nr; }
+    inline int rhor_istride() const { return _nr * _ntypes; }
+    inline int frho_stride() const { return _nrho; }
+
+  private:
+    int _ntypes, _nr, _nrho, _cop;
+    Memory *_memory;
+  };
+  ForceConst<float> force_const_single;
+  ForceConst<double> force_const_double;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: The 'package intel' command is required for /intel styles
+
+Self-explanatory.
+
+*/
--- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
@ -171,6 +171,15 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
  const int ntypes = atom->ntypes + 1;
  const int eatom = this->eflag_atom;

+  flt_t * _noalias const ccachex = buffers->get_ccachex();
+  flt_t * _noalias const ccachey = buffers->get_ccachey();
+  flt_t * _noalias const ccachez = buffers->get_ccachez();
+  flt_t * _noalias const ccachew = buffers->get_ccachew();
+  int * _noalias const ccachei = buffers->get_ccachei();
+  int * _noalias const ccachej = buffers->get_ccachej();
+  const int ccache_stride = _ccache_stride;
+
+
  // Determine how much data to transfer
  int x_size, q_size, f_stride, ev_size, separate_flag;
  IP_PRE_get_transfern(ago, NEWTON_PAIR, EVFLAG, EFLAG, vflag,
@ -208,8 +217,10 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
    in(x:length(x_size) alloc_if(0) free_if(0)) \
    in(q:length(q_size) alloc_if(0) free_if(0)) \
    in(overflow:length(0) alloc_if(0) free_if(0)) \
-    in(nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq,vflag,eatom) \
-    in(f_stride,separate_flag,offload) \
+    in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
+    in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
+    in(ccache_stride,nthreads,qqrd2e,g_ewald,inum,nall,ntypes,cut_coulsq) \
+    in(vflag,eatom,f_stride,separate_flag,offload) \
    in(astart,cut_ljsq,cut_lj_innersq,nlocal,inv_denom_lj,minlocal) \
    out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
    out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
@ -246,6 +257,14 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
      memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
      flt_t cutboth = cut_coulsq;

+      const int toffs = tid * ccache_stride;
+      flt_t * _noalias const tdelx = ccachex + toffs;
+      flt_t * _noalias const tdely = ccachey + toffs;
+      flt_t * _noalias const tdelz = ccachez + toffs;
+      flt_t * _noalias const trsq = ccachew + toffs;
+      int * _noalias const tj = ccachei + toffs;
+      int * _noalias const tjtype = ccachej + toffs;
+
      for (int i = iifrom; i < iito; ++i) {
 	//        const int i = ilist[ii];
        const int itype = x[i].w;
@ -270,80 +289,93 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
 	  if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
 	}

+	int ej = 0;
+        #if defined(LMP_SIMD_COMPILER)
+	#pragma vector aligned
+	#pragma ivdep
+        #endif
+        for (int jj = 0; jj < jnum; jj++) {
+          const int j = jlist[jj] & NEIGHMASK;
+	  const flt_t delx = xtmp - x[j].x;
+          const flt_t dely = ytmp - x[j].y;
+          const flt_t delz = ztmp - x[j].z;
+          const flt_t rsq = delx * delx + dely * dely + delz * delz;
+
+	  if (rsq < cut_coulsq) {
+	    trsq[ej]=rsq;
+	    tdelx[ej]=delx;
+	    tdely[ej]=dely;
+	    tdelz[ej]=delz;
+	    tjtype[ej]=x[j].w;
+	    tj[ej]=jlist[jj];
+	    ej++;
+	  }
+	}
+
        #if defined(LMP_SIMD_COMPILER)
 	#pragma vector aligned
 	#pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, secoul, \
 	                       sv0, sv1, sv2, sv3, sv4, sv5)
        #endif
-        for (int jj = 0; jj < jnum; jj++) {
+        for (int jj = 0; jj < ej; jj++) {
          flt_t forcecoul, forcelj, evdwl, ecoul;
          forcecoul = forcelj = evdwl = ecoul = (flt_t)0.0;

-          const int sbindex = jlist[jj] >> SBBITS & 3;
-          const int j = jlist[jj] & NEIGHMASK;
-
-          const flt_t delx = xtmp - x[j].x;
-          const flt_t dely = ytmp - x[j].y;
-          const flt_t delz = ztmp - x[j].z;
-          const int jtype = x[j].w;
-          const flt_t rsq = delx * delx + dely * dely + delz * delz;
+	  const int j = tj[jj] & NEIGHMASK;
+          const int sbindex = tj[jj] >> SBBITS & 3;
+	  const int jtype = tjtype[jj];
+	  const flt_t rsq = trsq[jj];
          const flt_t r2inv = (flt_t)1.0 / rsq;

-	  #ifdef INTEL_VMASK
-	  if (rsq < cut_coulsq) {
+          #ifdef INTEL_ALLOW_TABLE
+          if (!ncoultablebits || rsq <= tabinnersq) {
          #endif
-            #ifdef INTEL_ALLOW_TABLE
-            if (!ncoultablebits || rsq <= tabinnersq) {
-            #endif
-              const flt_t A1 =  0.254829592;
-              const flt_t A2 = -0.284496736;
-              const flt_t A3 =  1.421413741;
-              const flt_t A4 = -1.453152027;
-              const flt_t A5 =  1.061405429;
-              const flt_t EWALD_F = 1.12837917;
-              const flt_t INV_EWALD_P = 1.0 / 0.3275911;
+            const flt_t A1 =  0.254829592;
+	    const flt_t A2 = -0.284496736;
+	    const flt_t A3 =  1.421413741;
+	    const flt_t A4 = -1.453152027;
+	    const flt_t A5 =  1.061405429;
+	    const flt_t EWALD_F = 1.12837917;
+	    const flt_t INV_EWALD_P = 1.0 / 0.3275911;

-              const flt_t r = (flt_t)1.0 / sqrt(r2inv);
-              const flt_t grij = g_ewald * r;
-              const flt_t expm2 = exp(-grij * grij);
-              const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
-              const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
-              const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
-              forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
-              if (EFLAG) ecoul = prefactor * erfc;
+	    const flt_t r = (flt_t)1.0 / sqrt(r2inv);
+	    const flt_t grij = g_ewald * r;
+	    const flt_t expm2 = exp(-grij * grij);
+	    const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
+	    const flt_t erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
+	    const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
+	    forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
+	    if (EFLAG) ecoul = prefactor * erfc;

-	      const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
+	    const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
+	      prefactor;
+	    forcecoul -= adjust;
+	    if (EFLAG) ecoul -= adjust;
+
+          #ifdef INTEL_ALLOW_TABLE
+	  } else {
+	    float rsq_lookup = rsq;
+	    const int itable = (__intel_castf32_u32(rsq_lookup) &
+	                        ncoulmask) >> ncoulshiftbits;
+	    const flt_t fraction = (rsq_lookup - table[itable].r) *
+	      table[itable].dr;
+
+	    const flt_t tablet = table[itable].f +
+	      fraction * table[itable].df;
+	    forcecoul = qtmp * q[j] * tablet;
+	    if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
+	                       fraction * detable[itable]);
+	    if (sbindex) {
+   	      const flt_t table2 = ctable[itable] +
+		fraction * dctable[itable];
+	      const flt_t prefactor = qtmp * q[j] * table2;
+	      const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
 		prefactor;
 	      forcecoul -= adjust;
 	      if (EFLAG) ecoul -= adjust;
-
-            #ifdef INTEL_ALLOW_TABLE
-            } else {
-              float rsq_lookup = rsq;
-              const int itable = (__intel_castf32_u32(rsq_lookup) &
-                  ncoulmask) >> ncoulshiftbits;
-              const flt_t fraction = (rsq_lookup - table[itable].r) *
-                  table[itable].dr;
-
-              const flt_t tablet = table[itable].f +
-                  fraction * table[itable].df;
-              forcecoul = qtmp * q[j] * tablet;
-              if (EFLAG) ecoul = qtmp * q[j] * (etable[itable] +
-                  fraction * detable[itable]);
-              if (sbindex) {
-                const flt_t table2 = ctable[itable] +
-                    fraction * dctable[itable];
-                const flt_t prefactor = qtmp * q[j] * table2;
-                const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex]) *
-                    prefactor;
-                forcecoul -= adjust;
-                if (EFLAG) ecoul -= adjust;
-              }
-            }
-            #endif
-	  #ifdef INTEL_VMASK
-	  }
-	  #endif
+	    }
+          }
+          #endif

 	  #ifdef INTEL_VMASK
 	  if (rsq < cut_ljsq) {
@ -393,43 +425,40 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
 	  if (rsq > cut_ljsq) { forcelj = (flt_t)0.0; evdwl = (flt_t)0.0; }
 	  #endif

-	  #ifdef INTEL_VMASK
-	  if (rsq < cut_coulsq) {
-	  #endif
-            const flt_t fpair = (forcecoul + forcelj) * r2inv;
-            fxtmp += delx * fpair;
-            fytmp += dely * fpair;
-            fztmp += delz * fpair;
-            if (NEWTON_PAIR || j < nlocal) {
-              f[j].x -= delx * fpair;
-              f[j].y -= dely * fpair;
-              f[j].z -= delz * fpair;
-            }
-
-            if (EVFLAG) {
-              flt_t ev_pre = (flt_t)0;
-              if (NEWTON_PAIR || i < nlocal)
-                ev_pre += (flt_t)0.5;
-              if (NEWTON_PAIR || j < nlocal)
-                ev_pre += (flt_t)0.5;
-
-              if (EFLAG) {
-                sevdwl += ev_pre * evdwl;
-                secoul += ev_pre * ecoul;
-                if (eatom) {
-                  if (NEWTON_PAIR || i < nlocal)
-                    fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
-                  if (NEWTON_PAIR || j < nlocal)
-                    f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
-                }
-              }
-
-	      IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
-				   delx, dely, delz);
-            }
-	  #ifdef INTEL_VMASK
+	  const flt_t delx = tdelx[jj];
+	  const flt_t dely = tdely[jj];
+	  const flt_t delz = tdelz[jj];
+	  const flt_t fpair = (forcecoul + forcelj) * r2inv;
+	  fxtmp += delx * fpair;
+	  fytmp += dely * fpair;
+	  fztmp += delz * fpair;
+	  if (NEWTON_PAIR || j < nlocal) {
+	    f[j].x -= delx * fpair;
+	    f[j].y -= dely * fpair;
+	    f[j].z -= delz * fpair;
+	  }
+
+	  if (EVFLAG) {
+	    flt_t ev_pre = (flt_t)0;
+	    if (NEWTON_PAIR || i < nlocal)
+	      ev_pre += (flt_t)0.5;
+	    if (NEWTON_PAIR || j < nlocal)
+	      ev_pre += (flt_t)0.5;
+	    
+	    if (EFLAG) {
+	      sevdwl += ev_pre * evdwl;
+	      secoul += ev_pre * ecoul;
+	      if (eatom) {
+		if (NEWTON_PAIR || i < nlocal)
+		  fwtmp += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
+		if (NEWTON_PAIR || j < nlocal)
+		  f[j].w += (flt_t)0.5 * evdwl + (flt_t)0.5 * ecoul;
+	      }
+	    }
+
+	    IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair,
+				 delx, dely, delz);
 	  }
-	  #endif
        } // for jj
        f[i].x += fxtmp;
        f[i].y += fytmp;
@ -517,6 +546,13 @@ void PairLJCharmmCoulLongIntel::pack_force_const(ForceConst<flt_t> &fc,
  if (ncoultablebits)
    for (int i = 0; i < ncoultablebits; i++) ntable *= 2;

+  int off_ccache = 0;
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (_cop >= 0) off_ccache = 1;
+  #endif
+  buffers->grow_ccache(off_ccache, comm->nthreads, 1);
+  _ccache_stride = buffers->ccache_stride();
+
  fc.set_ntypes(tp1, ntable, memory, _cop);
  buffers->set_ntypes(tp1);
  flt_t **cutneighsq = buffers->get_cutneighsq();
--- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
+++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
@ -42,7 +42,7 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {

 private:
  FixIntel *fix;
-  int _cop, _lrt;
+  int _cop, _lrt, _ccache_stride;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
--- a/src/read_dump.cpp
+++ b/src/read_dump.cpp
@ -48,7 +48,7 @@ using namespace LAMMPS_NS;

 // also in reader_native.cpp

-enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ};
+enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ,FX,FY,FZ};
 enum{UNSET,NOSCALE_NOWRAP,NOSCALE_WRAP,SCALE_NOWRAP,SCALE_WRAP};

 /* ---------------------------------------------------------------------- */
@ -607,7 +607,8 @@ int ReadDump::fields_and_keywords(int narg, char **arg)

  if (dimension == 2) {
    for (int i = 0; i < nfield; i++)
-      if (fieldtype[i] == Z || fieldtype[i] == VZ || fieldtype[i] == IZ)
+      if (fieldtype[i] == Z || fieldtype[i] == VZ || 
+          fieldtype[i] == IZ || fieldtype[i] == FZ)
        error->all(FLERR,"Illegal read_dump command");
  }

@ -719,6 +720,9 @@ int ReadDump::whichtype(char *str)
  else if (strcmp(str,"ix") == 0) type = IX;
  else if (strcmp(str,"iy") == 0) type = IY;
  else if (strcmp(str,"iz") == 0) type = IZ;
+  else if (strcmp(str,"fx") == 0) type = FX;
+  else if (strcmp(str,"fy") == 0) type = FY;
+  else if (strcmp(str,"fz") == 0) type = FZ;
  return type;
 }

@ -741,6 +745,7 @@ void ReadDump::process_atoms(int n)
  double **x = atom->x;
  double **v = atom->v;
  double *q = atom->q;
+  double **f = atom->f;
  imageint *image = atom->image;
  int nlocal = atom->nlocal;
  tagint map_tag_max = atom->map_tag_max;
@ -805,6 +810,15 @@ void ReadDump::process_atoms(int n)
        case IZ:
          zbox = static_cast<int> (fields[i][ifield]);
          break;
+        case FX:
+          f[m][0] = fields[i][ifield];
+          break;
+        case FY:
+          f[m][1] = fields[i][ifield];
+          break;
+        case FZ:
+          f[m][2] = fields[i][ifield];
+          break;
        }
      }

--- a/src/reader_native.cpp
+++ b/src/reader_native.cpp
@ -24,7 +24,7 @@ using namespace LAMMPS_NS;

 // also in read_dump.cpp

-enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ};
+enum{ID,TYPE,X,Y,Z,VX,VY,VZ,Q,IX,IY,IZ,FX,FY,FZ};
 enum{UNSET,NOSCALE_NOWRAP,NOSCALE_WRAP,SCALE_NOWRAP,SCALE_WRAP};

 /* ---------------------------------------------------------------------- */
@ -254,6 +254,13 @@ bigint ReaderNative::read_header(double box[3][3], int &triclinic,
    else if (fieldtype[i] == VZ)
      fieldindex[i] = find_label("vz",nwords,labels);

+    else if (fieldtype[i] == FX)
+      fieldindex[i] = find_label("fx",nwords,labels);
+    else if (fieldtype[i] == FY)
+      fieldindex[i] = find_label("fy",nwords,labels);
+    else if (fieldtype[i] == FZ)
+      fieldindex[i] = find_label("fz",nwords,labels);
+
    else if (fieldtype[i] == Q)
      fieldindex[i] = find_label("q",nwords,labels);

--- a/src/version.h
+++ b/src/version.h
@ -1 +1 @@
-#define LAMMPS_VERSION "6 Jan 2017"
+#define LAMMPS_VERSION "9 Jan 2017"
				`@ -0,0 +1 @@`
				`‘-ÂØ×Á’@47h<²5@¸¿¦ÕKâ7@ÑR½]<5D>ô'Àéë nÒã@ÝŒIœH½÷¿¯?ó¨Ûè?r€I¨°.é¿š^×ÐêÞ?Wå£ò:È¿(Oã%º?ËNs•?öÄ¿<C384>:‘Cþ…¬?<3F>ËÃ:,H?à}‚cÈÔp¿£<C2BF>ê¬7g«¿`