diff --git a/bench/FERMI/README b/bench/FERMI/README
index d8c2f47497..b81356dee8 100644
--- a/bench/FERMI/README
+++ b/bench/FERMI/README
@@ -19,6 +19,34 @@ directories for instructions on how to build the packages with
 different precisions.  The GPU and USER-CUDA sub-sections of the
 doc/Section_accelerate.html file also describes this process.
 
+Make.py -d ~/lammps -j 16 -p #all orig -m linux -o cpu exe
+Make.py -d ~/lammps -j 16 -p #all opt orig -m linux -o opt exe
+Make.py -d ~/lammps -j 16 -p #all omp orig -m linux -o omp exe
+Make.py -d ~/lammps -j 16 -p #all gpu orig -m linux \
+        -gpu mode=double arch=20 -o gpu_double libs exe
+Make.py -d ~/lammps -j 16 -p #all gpu orig -m linux \
+        -gpu mode=mixed arch=20 -o gpu_mixed libs exe
+Make.py -d ~/lammps -j 16 -p #all gpu orig -m linux \
+        -gpu mode=single arch=20 -o gpu_single libs exe
+Make.py -d ~/lammps -j 16 -p #all cuda orig -m linux \
+        -cuda mode=double arch=20 -o cuda_double libs exe
+Make.py -d ~/lammps -j 16 -p #all cuda orig -m linux \
+        -cuda mode=mixed arch=20 -o cuda_mixed libs exe
+Make.py -d ~/lammps -j 16 -p #all cuda orig -m linux \
+        -cuda mode=single arch=20 -o cuda_single libs exe
+Make.py -d ~/lammps -j 16 -p #all intel orig -m linux -o intel_cpu exe
+Make.py -d ~/lammps -j 16 -p #all kokkos orig -m linux -o kokkos_omp exe
+Make.py -d ~/lammps -j 16 -p #all kokkos orig -kokkos cuda arch=20 \
+        -m cuda -o kokkos_cuda exe
+
+Make.py -d ~/lammps -j 16 -p #all opt omp gpu cuda intel kokkos orig \
+        -gpu mode=double arch=20 -cuda mode=double arch=20 -m linux \
+        -o all libs exe
+
+Make.py -d ~/lammps -j 16 -p #all opt omp gpu cuda intel kokkos orig \
+        -kokkos cuda arch=20 -gpu mode=double arch=20 \
+        -cuda mode=double arch=20 -m cuda -o all_cuda libs exe
+
 ------------------------------------------------------------------------
 
 To run on just CPUs (without using the GPU or USER-CUDA styles),
diff --git a/doc/Section_start.txt b/doc/Section_start.txt
index b07ef4485d..930e2814fc 100644
--- a/doc/Section_start.txt
+++ b/doc/Section_start.txt
@@ -87,26 +87,31 @@ Building LAMMPS can be simple or not-so-simple.  If all you need are
 the default packages installed in LAMMPS, and MPI is already installed
 on your machine, or you just want to run LAMMPS in serial, then you
 can typically use the Makefile.mpi or Makefile.serial files in
-src/MAKE and type one of these lines (from the src dir):
+src/MAKE by typing one of these lines (from the src dir):
 
 make mpi
 make serial :pre
 
-Or if one of the other Makefile.machine files in the src/MAKE
-sub-directories matches your system (type "make" to see a list), you
-can use it as-is by typing (for example):
+Note that on a facility supercomputer, there are often "modules"
+loaded in your environment that provide the compilers and MPI you
+should use.  In this case, the "mpicxx" compile/link command in
+Makefile.mpi should just work by accessing those modules.
+
+It may be the case that one of the other Makefile.machine files in the
+src/MAKE sub-directories is a better match to your system (type "make"
+to see a list), you can use it as-is by typing (for example):
 
 make stampede :pre
 
-If any of these builds with an existing Makefile.machine works on your
-system, then you're done!
+If any of these builds (with an existing Makefile.machine) works on
+your system, then you're done!
 
-If you want to do one of these:
+If you want to do one of the following:
 
 use optional LAMMPS features that require additional libraries
 use optional packages that require additional libraries
 use optional accelerator packages that require special compiler/linker settings
-run on a specialized platform like a supercomputer that has its own compilers, settings, or other libs to use :ul
+run on a specialized platform that has its own compilers, settings, or other libs to use :ul
 
 then building LAMMPS is more complicated.  You may need to find where
 auxiliary libraries exist on your machine or install them if they
@@ -135,9 +140,9 @@ please post the issue to the "LAMMPS mail
 list"_http://lammps.sandia.gov/mail.html.
 
 If you succeed in building LAMMPS on a new kind of machine, for which
-there isn't a similar machine Makefile included in the src/MAKE/MORE
-directory, then send it to the developers and we can include it in the
-LAMMPS distribution.
+there isn't a similar machine Makefile included in the
+src/MAKE/MACHINES directory, then send it to the developers and we can
+include it in the LAMMPS distribution.
 
 :line
 
diff --git a/examples/README b/examples/README
index fe40a0562e..67aae4b3b7 100644
--- a/examples/README
+++ b/examples/README
@@ -58,12 +58,12 @@ section of the LAMMPS WWW Site.
 These are the sample problems and their output in the various
 sub-directories:
 
+accelerate:  use of all the various accelerator packages
 balance:  dynamic load balancing, 2d system
 body:     body particles, 2d system
 colloid:  big colloid particles in a small particle solvent, 2d system
 comb:	  models using the COMB potential
 crack:	  crack propagation in a 2d solid
-cuda:     use of the USER-CUDA package for GPU acceleration
 deposit:  deposition of atoms and molecules onto a 3d substrate
 dipole:   point dipolar particles, 2d system
 dreiding: methanol via Dreiding FF
@@ -71,12 +71,9 @@ eim:      NaCl using the EIM potential
 ellipse:  ellipsoidal particles in spherical solvent, 2d system
 flow:	  Couette and Poiseuille flow in a 2d channel
 friction: frictional contact of spherical asperities between 2d surfaces
-gpu:      use of the GPU package for GPU acceleration
 hugoniostat: Hugoniostat shock dynamics
 indent:	  spherical indenter into a 2d solid
-intel:    use of the USER-INTEL package for CPU or Xeon Phi acceleration
 kim:      use of potentials in Knowledge Base for Interatomic Models (KIM)
-kokkos:   use of the KOKKOS package for multi-threading and GPU acceleration
 meam:	  MEAM test for SiC and shear (same as shear examples)
 melt:	  rapid melt of 3d LJ system
 micelle:  self-assembly of small lipid-like molecules into 2d bilayers
diff --git a/lib/cuda/Makefile.defaults b/lib/cuda/Makefile.defaults
index 98628750ec..3e50b7b229 100644
--- a/lib/cuda/Makefile.defaults
+++ b/lib/cuda/Makefile.defaults
@@ -1,6 +1,6 @@
 
 #precision setting: 1 single, 2 double, 4 mixed
-precision ?= 1
+precision ?= 2
 
 #verbose setting: 0 no, 1 yes
 verbose ?= 1
diff --git a/lib/cuda/Makefile.lammps b/lib/cuda/Makefile.lammps
index 593a924182..a488404f31 100644
--- a/lib/cuda/Makefile.lammps
+++ b/lib/cuda/Makefile.lammps
@@ -1,5 +1,5 @@
 # Settings that the LAMMPS build will import when this package library is used
-CUDA_FLAGS := -I/usr/local/cuda/include -DUNIX  -DFFT_CUFFT -DCUDA_PRECISION=1 -DCUDA_ARCH=20 
+CUDA_FLAGS := -I/usr/local/cuda/include -DUNIX  -DFFT_CUFFT -DCUDA_PRECISION=2 -DCUDA_ARCH=20 
 CUDA_USRLIB_CONDITIONAL := -L/usr/local/cuda/lib -L/usr/local/cuda/lib64 -lcufft
  
 user-cuda_SYSINC = ${CUDA_FLAGS}