Merge branch 'master' into math_eigen

2020-09-09 14:56:28 -07:00 · 2020-09-09 14:56:28 -07:00 · 3bacf97468
parent 2ab0878c9e cdd9d693ad
commit 3bacf97468
64 changed files with 5198 additions and 3338 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -318,7 +318,7 @@ if(WITH_JPEG)
  find_package(JPEG REQUIRED)
  target_compile_definitions(lammps PRIVATE -DLAMMPS_JPEG)
  if(CMAKE_VERSION VERSION_LESS 3.12)
-    target_include_directories(lammps PRIVATE ${JPEG_INCLUDE_DIR})
+    target_include_directories(lammps PRIVATE ${JPEG_INCLUDE_DIRS})
    target_link_libraries(lammps PRIVATE ${JPEG_LIBRARIES})
  else()
    target_link_libraries(lammps PRIVATE JPEG::JPEG)
--- a/cmake/Modules/Packages/KSPACE.cmake
+++ b/cmake/Modules/Packages/KSPACE.cmake
@ -19,16 +19,16 @@ if(FFT STREQUAL "FFTW3")
  find_package(${FFTW} REQUIRED)
  target_compile_definitions(lammps PRIVATE -DFFT_FFTW3)
  target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW})
-  if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
+  if(FFTW3_OMP_LIBRARIES OR FFTW3F_OMP_LIBRARIES)
    option(FFT_FFTW_THREADS "Use threaded FFTW library" ON)
  else()
    option(FFT_FFTW_THREADS "Use threaded FFT library" OFF)
  endif()

  if(FFT_FFTW_THREADS)
-    if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
-	target_compile_definitions(lammps PRIVATE -DFFT_FFTW_THREADS)
-	target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}_OMP)
+    if(FFTW3_OMP_LIBRARIES OR FFTW3F_OMP_LIBRARIES)
+      target_compile_definitions(lammps PRIVATE -DFFT_FFTW_THREADS)
+      target_link_libraries(lammps PRIVATE ${FFTW}::${FFTW}_OMP)
    else()
      message(FATAL_ERROR "Need OpenMP enabled FFTW3 library for FFT_THREADS")
    endif()
--- a/cmake/Modules/Packages/MSCG.cmake
+++ b/cmake/Modules/Packages/MSCG.cmake
@ -38,7 +38,7 @@ if(DOWNLOAD_MSCG)
 else()
  find_package(MSCG)
  if(NOT MSCG_FOUND)
-    message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it")
+    message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIR, or set DOWNLOAD_MSCG=ON to download it")
  endif()
  target_link_libraries(lammps PRIVATE MSCG::MSCG)
 endif()
--- a/cmake/Modules/Packages/PYTHON.cmake
+++ b/cmake/Modules/Packages/PYTHON.cmake
@ -1,7 +1,7 @@
 if(CMAKE_VERSION VERSION_LESS 3.12)
  find_package(PythonLibs REQUIRED) # Deprecated since version 3.12
-  target_include_directories(lammps PRIVATE ${PYTHON_INCLUDE_DIR})
-  target_link_libraries(lammps PRIVATE ${PYTHON_LIBRARY})
+  target_include_directories(lammps PRIVATE ${PYTHON_INCLUDE_DIRS})
+  target_link_libraries(lammps PRIVATE ${PYTHON_LIBRARIES})
 else()
  find_package(Python REQUIRED COMPONENTS Development)
  target_link_libraries(lammps PRIVATE Python::Python)
--- a/cmake/Modules/Packages/USER-MOLFILE.cmake
+++ b/cmake/Modules/Packages/USER-MOLFILE.cmake
@ -1,4 +1,5 @@
-set(MOLFILE_INCLUDE_DIRS "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers")
+set(MOLFILE_INCLUDE_DIR "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers")
+set(MOLFILE_INCLUDE_DIRS "${MOLFILE_INCLUDE_DIR}")
 add_library(molfile INTERFACE)
 target_include_directories(molfile INTERFACE ${MOLFILE_INCLUDE_DIRS})
 # no need to link with -ldl on windows
--- a/doc/doxygen/Doxyfile.in
+++ b/doc/doxygen/Doxyfile.in
@ -410,25 +410,28 @@ WARN_LOGFILE           = "../doxygen-warn.log"
 # spaces. See also FILE_PATTERNS and EXTENSION_MAPPING
 # Note: If this tag is empty the current directory is searched.

-INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp      \
-                         @LAMMPS_SOURCE_DIR@/utils.h        \
-                         @LAMMPS_SOURCE_DIR@/library.cpp    \
-                         @LAMMPS_SOURCE_DIR@/library.h      \
-                         @LAMMPS_SOURCE_DIR@/lammps.cpp     \
-                         @LAMMPS_SOURCE_DIR@/lammps.h       \
-                         @LAMMPS_SOURCE_DIR@/lmptype.h      \
-                         @LAMMPS_SOURCE_DIR@/pointers.h     \
-                         @LAMMPS_SOURCE_DIR@/atom.cpp       \
-                         @LAMMPS_SOURCE_DIR@/atom.h         \
-                         @LAMMPS_SOURCE_DIR@/input.cpp      \
-                         @LAMMPS_SOURCE_DIR@/input.h        \
-                         @LAMMPS_SOURCE_DIR@/tokenizer.cpp  \
-                         @LAMMPS_SOURCE_DIR@/tokenizer.h    \
-                         @LAMMPS_SOURCE_DIR@/math_eigen.h    \
-                         @LAMMPS_SOURCE_DIR@/text_file_reader.cpp  \
-                         @LAMMPS_SOURCE_DIR@/text_file_reader.h    \
-                         @LAMMPS_SOURCE_DIR@/potential_file_reader.cpp  \
-                         @LAMMPS_SOURCE_DIR@/potential_file_reader.h    \
+INPUT                  = @LAMMPS_SOURCE_DIR@/utils.cpp                 \
+                         @LAMMPS_SOURCE_DIR@/utils.h                   \
+                         @LAMMPS_SOURCE_DIR@/library.cpp               \
+                         @LAMMPS_SOURCE_DIR@/library.h                 \
+                         @LAMMPS_SOURCE_DIR@/lammps.cpp                \
+                         @LAMMPS_SOURCE_DIR@/lammps.h                  \
+                         @LAMMPS_SOURCE_DIR@/lmptype.h                 \
+                         @LAMMPS_SOURCE_DIR@/atom.cpp                  \
+                         @LAMMPS_SOURCE_DIR@/atom.h                    \
+                         @LAMMPS_SOURCE_DIR@/input.cpp                 \
+                         @LAMMPS_SOURCE_DIR@/input.h                   \
+                         @LAMMPS_SOURCE_DIR@/tokenizer.cpp             \
+                         @LAMMPS_SOURCE_DIR@/tokenizer.h               \
+                         @LAMMPS_SOURCE_DIR@/text_file_reader.cpp      \
+                         @LAMMPS_SOURCE_DIR@/text_file_reader.h        \
+                         @LAMMPS_SOURCE_DIR@/potential_file_reader.cpp \
+                         @LAMMPS_SOURCE_DIR@/potential_file_reader.h   \
+                         @LAMMPS_SOURCE_DIR@/my_page.cpp               \
+                         @LAMMPS_SOURCE_DIR@/my_page.h                 \
+                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.cpp         \
+                         @LAMMPS_SOURCE_DIR@/my_pool_chunk.h           \
+                         @LAMMPS_SOURCE_DIR@/math_eigen.h              \

 # The EXCLUDE_SYMLINKS tag can be used to select whether or not files or
 # directories that are symbolic links (a Unix file system feature) are excluded
--- a/doc/src/Build_basics.rst
+++ b/doc/src/Build_basics.rst
@ -32,74 +32,80 @@ LAMMPS are also written with support for shared memory parallelization
 using the `OpenMP <https://en.wikipedia.org/wiki/OpenMP>`_ threading
 standard. A more detailed discussion of that is below.

-**CMake build**\ :
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D BUILD_MPI=value        # yes or no, default is yes if CMake finds MPI, else no
-   -D BUILD_OMP=value        # yes or no, default is yes if a compatible compiler is detected
-   -D LAMMPS_MACHINE=name    # name = mpi, serial, mybox, titan, laptop, etc
-                             # no default value
+      .. code-block:: bash

-The executable created by CMake (after running make) is named ``lmp`` unless
-the ``LAMMPS_MACHINE`` option is set.  When setting ``LAMMPS_MACHINE=name``
-the executable will be called ``lmp_name``.  Using ``BUILD_MPI=no`` will
-enforce building a serial executable using the MPI STUBS library.
+         -D BUILD_MPI=value        # yes or no, default is yes if CMake finds MPI, else no
+         -D BUILD_OMP=value        # yes or no, default is yes if a compatible compiler is detected
+         -D LAMMPS_MACHINE=name    # name = mpi, serial, mybox, titan, laptop, etc
+                                   # no default value

-**Traditional make**\ :
+      The executable created by CMake (after running make) is named
+      ``lmp`` unless the ``LAMMPS_MACHINE`` option is set.  When setting
+      ``LAMMPS_MACHINE=name`` the executable will be called
+      ``lmp_name``.  Using ``BUILD_MPI=no`` will enforce building a
+      serial executable using the MPI STUBS library.

-The build with traditional makefiles has to be done inside the source folder ``src``.
+   .. tab:: Traditional make

-.. code-block:: bash
+      The build with traditional makefiles has to be done inside the source folder ``src``.

-   make mpi                # parallel build, produces lmp_mpi using Makefile.mpi
-   make serial             # serial build, produces lmp_serial using Makefile/serial
-   make mybox              # uses Makefile.mybox to produce lmp_mybox
+      .. code-block:: bash

-Any ``make machine`` command will look up the make settings from a file
-``Makefile.machine`` in the folder ``src/MAKE`` or one of its
-sub-directories ``MINE``, ``MACHINES``, or ``OPTIONS``, create a folder
-``Obj_machine`` with all objects and generated files and an executable
-called ``lmp_machine``\ .  The standard parallel build with ``make mpi``
-assumes a standard MPI installation with MPI compiler wrappers where all
-necessary compiler and linker flags to get access and link with the
-suitable MPI headers and libraries are set by the wrapper programs.  For
-other cases or the serial build, you have to adjust the make file
-variables ``MPI_INC``, ``MPI_PATH``, ``MPI_LIB`` as well as ``CC`` and
-``LINK``\ .  To enable OpenMP threading usually a compiler specific flag
-needs to be added to the compile and link commands.  For the GNU
-compilers, this is ``-fopenmp``\ , which can be added to the ``CC`` and
-``LINK`` makefile variables.
+         make mpi                # parallel build, produces lmp_mpi using Makefile.mpi
+         make serial             # serial build, produces lmp_serial using Makefile/serial
+         make mybox              # uses Makefile.mybox to produce lmp_mybox

-For the serial build the following make variables are set (see src/MAKE/Makefile.serial):
+      Any ``make machine`` command will look up the make settings from a
+      file ``Makefile.machine`` in the folder ``src/MAKE`` or one of its
+      sub-directories ``MINE``, ``MACHINES``, or ``OPTIONS``, create a
+      folder ``Obj_machine`` with all objects and generated files and an
+      executable called ``lmp_machine``\ .  The standard parallel build
+      with ``make mpi`` assumes a standard MPI installation with MPI
+      compiler wrappers where all necessary compiler and linker flags to
+      get access and link with the suitable MPI headers and libraries
+      are set by the wrapper programs.  For other cases or the serial
+      build, you have to adjust the make file variables ``MPI_INC``,
+      ``MPI_PATH``, ``MPI_LIB`` as well as ``CC`` and ``LINK``\ .  To
+      enable OpenMP threading usually a compiler specific flag needs to
+      be added to the compile and link commands.  For the GNU compilers,
+      this is ``-fopenmp``\ , which can be added to the ``CC`` and
+      ``LINK`` makefile variables.

-.. code-block:: make
+      For the serial build the following make variables are set (see src/MAKE/Makefile.serial):

-   CC =            g++
-   LINK =          g++
-   MPI_INC =       -I../STUBS
-   MPI_PATH =      -L../STUBS
-   MPI_LIB =       -lmpi_stubs
+      .. code-block:: make

-You also need to build the STUBS library for your platform before making
-LAMMPS itself.  A ``make serial`` build does this for you automatically,
-otherwise, type ``make mpi-stubs`` from the src directory, or ``make``
-from the ``src/STUBS`` dir.  If the build fails, you may need to edit
-the ``STUBS/Makefile`` for your platform.  The stubs library does not
-provide MPI/IO functions required by some LAMMPS packages,
-e.g. ``MPIIO`` or ``USER-LB``, and thus is not compatible with those
-packages.
+         CC =            g++
+         LINK =          g++
+         MPI_INC =       -I../STUBS
+         MPI_PATH =      -L../STUBS
+         MPI_LIB =       -lmpi_stubs

-.. note::
+      You also need to build the STUBS library for your platform before
+      making LAMMPS itself.  A ``make serial`` build does this for you
+      automatically, otherwise, type ``make mpi-stubs`` from the src
+      directory, or ``make`` from the ``src/STUBS`` dir.  If the build
+      fails, you may need to edit the ``STUBS/Makefile`` for your
+      platform.  The stubs library does not provide MPI/IO functions
+      required by some LAMMPS packages, e.g. ``MPIIO`` or ``USER-LB``,
+      and thus is not compatible with those packages.

-   The file ``src/STUBS/mpi.c`` provides a CPU timer function called
-   ``MPI_Wtime()`` that calls ``gettimeofday()``.  If your operating system
-   does not support ``gettimeofday()``, you will need to insert code to
-   call another timer.  Note that the ANSI-standard function ``clock()``
-   rolls over after an hour or so, and is therefore insufficient for
-   timing long LAMMPS simulations.
+      .. note::

-**MPI and OpenMP support info**\ :
+         The file ``src/STUBS/mpi.c`` provides a CPU timer function
+         called ``MPI_Wtime()`` that calls ``gettimeofday()``.  If your
+         operating system does not support ``gettimeofday()``, you will
+         need to insert code to call another timer.  Note that the
+         ANSI-standard function ``clock()`` rolls over after an hour or
+         so, and is therefore insufficient for timing long LAMMPS
+         simulations.
+
+MPI and OpenMP support in LAMMPS
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

 If you are installing MPI yourself to build a parallel LAMMPS
 executable, we recommend either MPICH or OpenMPI which are regularly
@ -145,18 +151,19 @@ please refer to its documentation.

 .. _default-none-issues:

-**OpenMP Compiler compatibility info**\ :
+OpenMP Compiler compatibility
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-Some compilers do not fully support the ``default(none)`` directive
-and others (e.g. GCC version 9 and beyond, Clang version 10 and later)
-may implement strict OpenMP 4.0 and later semantics, which are incompatible
+Some compilers do not fully support the ``default(none)`` directive and
+others (e.g. GCC version 9 and beyond, Clang version 10 and later) may
+implement strict OpenMP 4.0 and later semantics, which are incompatible
 with the OpenMP 3.1 semantics used in LAMMPS for maximal compatibility
 with compiler versions in use.  If compilation with OpenMP enabled fails
 because of your compiler requiring strict OpenMP 4.0 semantic, you can
-change the behavior by adding ``-D LAMMPS_OMP_COMPAT=4`` to the ``LMP_INC``
-variable in your makefile, or add it to the command line while configuring
-with CMake. CMake will detect the suitable setting for the GNU, Clang,
-and Intel compilers.
+change the behavior by adding ``-D LAMMPS_OMP_COMPAT=4`` to the
+``LMP_INC`` variable in your makefile, or add it to the command line
+while configuring with CMake. CMake will detect the suitable setting for
+the GNU, Clang, and Intel compilers.

 ----------

@ -185,131 +192,139 @@ for their compile/link environments, you can often access different
 compilers by simply loading the appropriate module before building
 LAMMPS.

-**CMake build**\ :
+.. tabs::

-By default CMake will use a compiler it finds according to internal
-preferences and it will add optimization flags appropriate to that
-compiler and any :doc:`accelerator packages <Speed_packages>` you have
-included in the build.
+   .. tab:: CMake build

-You can tell CMake to look for a specific compiler with setting CMake
-variables (listed below) during configuration.  For a few common
-choices, there are also presets in the ``cmake/presets`` folder.  For
-convenience, there is a ``CMAKE_TUNE_FLAGS`` variable that can be set to
-apply global compiler options (applied to compilation only), to be used
-for adding compiler or host specific optimization flags in addition to
-the "flags" variables listed below. You may also specify the
-corresponding ``CMAKE_*_FLAGS`` variables individually, if you want to
-experiment with alternate optimization flags.  You should specify all 3
-compilers, so that the (few) LAMMPS source files written in C or Fortran
-are built with a compiler consistent with the one used for the C++
-files:
+      By default CMake will use the compiler it finds according to
+      internal preferences and it will add optimization flags
+      appropriate to that compiler and any :doc:`accelerator packages
+      <Speed_packages>` you have included in the build.  CMake will
+      check if the detected or selected compiler is compatible with the
+      C++ support requirements of LAMMPS and stop with an error, if this
+      is not the case.

-.. code-block:: bash
+      You can tell CMake to look for a specific compiler with setting
+      CMake variables (listed below) during configuration.  For a few
+      common choices, there are also presets in the ``cmake/presets``
+      folder.  For convenience, there is a ``CMAKE_TUNE_FLAGS`` variable
+      that can be set to apply global compiler options (applied to
+      compilation only), to be used for adding compiler or host specific
+      optimization flags in addition to the "flags" variables listed
+      below. You may also specify the corresponding ``CMAKE_*_FLAGS``
+      variables individually, if you want to experiment with alternate
+      optimization flags.  You should specify all 3 compilers, so that
+      the (few) LAMMPS source files written in C or Fortran are built
+      with a compiler consistent with the one used for the C++ files:

-   -D CMAKE_CXX_COMPILER=name            # name of C++ compiler
-   -D CMAKE_C_COMPILER=name              # name of C compiler
-   -D CMAKE_Fortran_COMPILER=name        # name of Fortran compiler
+      .. code-block:: bash

-   -D CMAKE_CXX_FLAGS=string             # flags to use with C++ compiler
-   -D CMAKE_C_FLAGS=string               # flags to use with C compiler
-   -D CMAKE_Fortran_FLAGS=string         # flags to use with Fortran compiler
+         -D CMAKE_CXX_COMPILER=name            # name of C++ compiler
+         -D CMAKE_C_COMPILER=name              # name of C compiler
+         -D CMAKE_Fortran_COMPILER=name        # name of Fortran compiler

-A few example command lines are:
+         -D CMAKE_CXX_FLAGS=string             # flags to use with C++ compiler
+         -D CMAKE_C_FLAGS=string               # flags to use with C compiler
+         -D CMAKE_Fortran_FLAGS=string         # flags to use with Fortran compiler

-.. code-block:: bash
+      A few example command lines are:

-   # Building with GNU Compilers:
-   cmake ../cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_Fortran_COMPILER=gfortran
-   # Building with Intel Compilers:
-   cmake ../cmake -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DCMAKE_Fortran_COMPILER=ifort
-   # Building with LLVM/Clang Compilers:
-   cmake ../cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_Fortran_COMPILER=flang
+      .. code-block:: bash

-For compiling with the Clang/LLVM compilers a CMake preset is provided that
-can be loaded with `-C ../cmake/presets/clang.cmake`.  Similarly,
-`-C ../cmake/presets/intel.cmake` should switch the
+         # Building with GNU Compilers:
+         cmake ../cmake -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_Fortran_COMPILER=gfortran
+         # Building with Intel Compilers:
+         cmake ../cmake -DCMAKE_C_COMPILER=icc -DCMAKE_CXX_COMPILER=icpc -DCMAKE_Fortran_COMPILER=ifort
+         # Building with LLVM/Clang Compilers:
+         cmake ../cmake -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_Fortran_COMPILER=flang

-In addition you can set ``CMAKE_TUNE_FLAGS`` to specifically add
-compiler flags to tune for optimal performance on given hosts. By
-default this variable is empty.
+      For compiling with the Clang/LLVM compilers a CMake preset is
+      provided that can be loaded with
+      `-C ../cmake/presets/clang.cmake`.  Similarly,
+      `-C ../cmake/presets/intel.cmake` should switch the compiler
+      toolchain to the Intel compilers.

-.. note::
+      In addition you can set ``CMAKE_TUNE_FLAGS`` to specifically add
+      compiler flags to tune for optimal performance on given hosts. By
+      default this variable is empty.

-   When the cmake command completes, it prints a summary to the screen
-   which compilers it is using and what flags and settings will be used
-   for the  compilation.  Note that if the top-level compiler is mpicxx,
-   it is simply a wrapper on a real compiler.  The underlying compiler
-   info is what CMake will try to determine and report.  You should check
-   to confirm you are using the compiler and optimization flags you want.
+      .. note::

-**Makefile.machine settings for traditional make**\ :
+         When the cmake command completes, it prints a summary to the
+         screen which compilers it is using and what flags and settings
+         will be used for the compilation.  Note that if the top-level
+         compiler is mpicxx, it is simply a wrapper on a real compiler.
+         The underlying compiler info is what CMake will try to
+         determine and report.  You should check to confirm you are
+         using the compiler and optimization flags you want.

-The "compiler/linker settings" section of a Makefile.machine lists
-compiler and linker settings for your C++ compiler, including
-optimization flags.  For a parallel build it is recommended to use
-``mpicxx`` or ``mpiCC``, since these compiler wrappers will include a
-variety of settings appropriate for your MPI installation and thus
-avoiding the guesswork of finding the right flags.
+   .. tab:: Makefile.machine settings for traditional make

-Parallel build (see ``src/MAKE/Makefile.mpi``):
+      The "compiler/linker settings" section of a Makefile.machine lists
+      compiler and linker settings for your C++ compiler, including
+      optimization flags.  For a parallel build it is recommended to use
+      ``mpicxx`` or ``mpiCC``, since these compiler wrappers will
+      include a variety of settings appropriate for your MPI
+      installation and thus avoiding the guesswork of finding the right
+      flags.

-.. code-block:: bash
+      Parallel build (see ``src/MAKE/Makefile.mpi``):

-   CC =            mpicxx
-   CCFLAGS =       -g -O3
-   LINK =          mpicxx
-   LINKFLAGS =     -g -O
+      .. code-block:: bash

-Serial build with GNU gcc (see ``src/MAKE/Makefile.serial``):
+         CC =            mpicxx
+         CCFLAGS =       -g -O3
+         LINK =          mpicxx
+         LINKFLAGS =     -g -O

-.. code-block:: make
+      Serial build with GNU gcc (see ``src/MAKE/Makefile.serial``):

-   CC =            g++
-   CCFLAGS =       -g -O3
-   LINK =          g++
-   LINKFLAGS =     -g -O
+      .. code-block:: make

+         CC =            g++
+         CCFLAGS =       -g -O3
+         LINK =          g++
+         LINKFLAGS =     -g -O

-.. note::
+      .. note::

-   If compilation stops with a message like the following:
+         If compilation stops with a message like the following:

-   .. code-block::
+         .. code-block::

-      g++ -g -O3  -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64    -I../STUBS     -c ../main.cpp
-      In file included from ../pointers.h:24:0,
-                 from ../input.h:17,
-                 from ../main.cpp:16:
-      ../lmptype.h:34:2: error: #error LAMMPS requires a C++11 (or later) compliant compiler. Enable C++11 compatibility or upgrade the compiler.
+            g++ -g -O3  -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64    -I../STUBS     -c ../main.cpp
+            In file included from ../pointers.h:24:0,
+                       from ../input.h:17,
+                       from ../main.cpp:16:
+            ../lmptype.h:34:2: error: #error LAMMPS requires a C++11 (or later) compliant compiler. Enable C++11 compatibility or upgrade the compiler.

-   then you have either an unsupported (old) compiler or you have to
-   turn on C++11 mode.  The latter applies to GCC 4.8.x shipped with
-   RHEL 7.x and CentOS 7.x.  For those compilers, you need to add the
-   ``-std=c++11`` flag.  Otherwise, you would have to install a newer
-   compiler that supports C++11; either as a binary package or through
-   compiling from source.
+         then you have either an unsupported (old) compiler or you have
+         to turn on C++11 mode.  The latter applies to GCC 4.8.x shipped
+         with RHEL 7.x and CentOS 7.x.  For those compilers, you need to
+         add the ``-std=c++11`` flag.  Otherwise, you would have to
+         install a newer compiler that supports C++11; either as a
+         binary package or through compiling from source.

-If you build LAMMPS with any :doc:`Speed_packages` included, there may
-be specific compiler or linker flags
-that are either required or recommended to enable required features and
-to achieve optimal performance.  You need to include these in the
-CCFLAGS and LINKFLAGS settings above.  For details, see the individual
-package doc pages listed on the :doc:`Speed_packages`
-page.  Or examine these files in the src/MAKE/OPTIONS directory.
-They correspond to each of the 5 accelerator packages and their hardware
-variants:
+         If you build LAMMPS with any :doc:`Speed_packages` included,
+         there may be specific compiler or linker flags that are either
+         required or recommended to enable required features and to
+         achieve optimal performance.  You need to include these in the
+         CCFLAGS and LINKFLAGS settings above.  For details, see the
+         individual package doc pages listed on the
+         :doc:`Speed_packages` page.  Or examine these files in the
+         src/MAKE/OPTIONS directory.  They correspond to each of the 5
+         accelerator packages and their hardware variants:

-.. code-block:: bash
+         .. code-block:: bash

-   Makefile.opt                   # OPT package
-   Makefile.omp                   # USER-OMP package
-   Makefile.intel_cpu             # USER-INTEL package for CPUs
-   Makefile.intel_coprocessor     # USER-INTEL package for KNLs
-   Makefile.gpu                   # GPU package
-   Makefile.kokkos_cuda_mpi       # KOKKOS package for GPUs
-   Makefile.kokkos_omp            # KOKKOS package for CPUs (OpenMP)
-   Makefile.kokkos_phi            # KOKKOS package for KNLs (OpenMP)
+            Makefile.opt                   # OPT package
+            Makefile.omp                   # USER-OMP package
+            Makefile.intel_cpu             # USER-INTEL package for CPUs
+            Makefile.intel_coprocessor     # USER-INTEL package for KNLs
+            Makefile.gpu                   # GPU package
+            Makefile.kokkos_cuda_mpi       # KOKKOS package for GPUs
+            Makefile.kokkos_omp            # KOKKOS package for CPUs (OpenMP)
+            Makefile.kokkos_phi            # KOKKOS package for KNLs (OpenMP)

 ----------

@ -328,51 +343,56 @@ page for more info on coupling LAMMPS to other codes.  See the
 :doc:`Python <Python_head>` doc page for more info on wrapping and
 running LAMMPS from Python via its library interface.

-**CMake build**\ :
+.. tabs::

-For CMake builds, you can select through setting CMake variables between
-building a shared or a static LAMMPS library and what kind of suffix is
-added to them (in case you want to concurrently install multiple variants
-of binaries with different settings). If none are set, defaults are applied.
+   .. tab:: CMake build

-.. code-block:: bash
+      For CMake builds, you can select through setting CMake variables
+      between building a shared or a static LAMMPS library and what kind
+      of suffix is added to them (in case you want to concurrently
+      install multiple variants of binaries with different settings). If
+      none are set, defaults are applied.

-   -D BUILD_SHARED_LIBS=value   # yes or no (default)
-   -D LAMMPS_MACHINE=name       # name = mpi, serial, mybox, titan, laptop, etc
-                                # no default value
+      .. code-block:: bash

-The compilation will always produce a LAMMPS library and an executable
-linked to it.  By default this will be a static library named
-``liblammps.a`` and an executable named ``lmp`` Setting
-``BUILD_SHARED_LIBS=yes`` will instead produce a shared library called
-``liblammps.so`` (or ``liblammps.dylib`` or ``liblammps.dll`` depending
-on the platform) If ``LAMMPS_MACHINE=name`` is set in addition, the name
-of the generated libraries will be changed to either
-``liblammps_name.a`` or ``liblammps_name.so``\ , respectively and the
-executable will be called ``lmp_name``.
+         -D BUILD_SHARED_LIBS=value   # yes or no (default)
+         -D LAMMPS_MACHINE=name       # name = mpi, serial, mybox, titan, laptop, etc
+                                      # no default value

-**Traditional make**\ :
+      The compilation will always produce a LAMMPS library and an
+      executable linked to it.  By default this will be a static library
+      named ``liblammps.a`` and an executable named ``lmp`` Setting
+      ``BUILD_SHARED_LIBS=yes`` will instead produce a shared library
+      called ``liblammps.so`` (or ``liblammps.dylib`` or
+      ``liblammps.dll`` depending on the platform) If
+      ``LAMMPS_MACHINE=name`` is set in addition, the name of the
+      generated libraries will be changed to either ``liblammps_name.a``
+      or ``liblammps_name.so``\ , respectively and the executable will
+      be called ``lmp_name``.

-With the traditional makefile based build process, the choice of
-the generated executable or library depends on the "mode" setting.
-Several options are available and ``mode=static`` is the default.
+   .. tab:: Traditional make

-.. code-block:: bash
+      With the traditional makefile based build process, the choice of
+      the generated executable or library depends on the "mode" setting.
+      Several options are available and ``mode=static`` is the default.

-   make machine               # build LAMMPS executable lmp_machine
-   make mode=static machine   # same as "make machine"
-   make mode=shared machine   # build LAMMPS shared lib liblammps_machine.so instead
+      .. code-block:: bash

-The "static" build will generate a static library called
-``liblammps_machine.a`` and an executable named ``lmp_machine``\ , while
-the "shared" build will generate a shared library
-``liblammps_machine.so`` instead and ``lmp_machine`` will be linked to
-it.  The build step will also create generic soft links, named
-``liblammps.a`` and ``liblammps.so``\ , which point to the specific
-``liblammps_machine.a/so`` files.
+         make machine               # build LAMMPS executable lmp_machine
+         make mode=static machine   # same as "make machine"
+         make mode=shared machine   # build LAMMPS shared lib liblammps_machine.so instead

-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+      The "static" build will generate a static library called
+      ``liblammps_machine.a`` and an executable named ``lmp_machine``\ ,
+      while the "shared" build will generate a shared library
+      ``liblammps_machine.so`` instead and ``lmp_machine`` will be
+      linked to it.  The build step will also create generic soft links,
+      named ``liblammps.a`` and ``liblammps.so``\ , which point to the
+      specific ``liblammps_machine.a/so`` files.
+
+
+Additional information
+^^^^^^^^^^^^^^^^^^^^^^

 Note that for creating a shared library, all the libraries it depends on
 must be compiled to be compatible with shared libraries.  This should be
@ -462,8 +482,8 @@ tool.  The actual translation is then done via make commands.
 .. _rst: https://docutils.readthedocs.io/en/sphinx-docs/user/rst/quickstart.html
 .. _sphinx: https://www.sphinx-doc.org

-Documentation make option
-^^^^^^^^^^^^^^^^^^^^^^^^^
+Documentation makefile options
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

 The following make commands can be issued in the doc folder of the
 LAMMPS source distribution.
@ -490,15 +510,16 @@ your system.
   current LAMMPS version (HTML and PDF files), from the website
   `download page <https://lammps.sandia.gov/download.html>`_.

-CMake build option
-^^^^^^^^^^^^^^^^^^
+CMake build options
+^^^^^^^^^^^^^^^^^^^

-It is also possible to create the HTML version of the manual within
-the :doc:`CMake build directory <Build_cmake>`.  The reason for this
-option is to include the installation of the HTML manual pages into
-the "install" step when installing LAMMPS after the CMake build via
-``make install``.  The documentation build is included in the default
-build target, but can also be requested independently with ``make doc``.
+It is also possible to create the HTML version (and only the HTML
+version) of the manual within the :doc:`CMake build directory
+<Build_cmake>`.  The reason for this option is to include the
+installation of the HTML manual pages into the "install" step when
+installing LAMMPS after the CMake build via ``make install``.  The
+documentation build is included in the default build target, but can
+also be requested independently with ``make doc``.

 .. code-block:: bash

@ -514,27 +535,27 @@ Build LAMMPS tools
 Some tools described in :doc:`Auxiliary tools <Tools>` can be built directly
 using CMake or Make.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D BUILD_TOOLS=value       # yes or no (default)
+      .. code-block:: bash

-The generated binaries will also become part of the LAMMPS installation
-(see below).
+         -D BUILD_TOOLS=value       # yes or no (default)

-Traditional make
-^^^^^^^^^^^^^^^^
+      The generated binaries will also become part of the LAMMPS installation
+      (see below).

-.. code-block:: bash
+   .. tab:: Traditional make

-   cd lammps/tools
-   make all              # build all binaries of tools
-   make binary2txt       # build only binary2txt tool
-   make chain            # build only chain tool
-   make micelle2d        # build only micelle2d tool
-   make thermo_extract   # build only thermo_extract tool
+      .. code-block:: bash
+
+         cd lammps/tools
+         make all              # build all binaries of tools
+         make binary2txt       # build only binary2txt tool
+         make chain            # build only chain tool
+         make micelle2d        # build only micelle2d tool
+         make thermo_extract   # build only thermo_extract tool

 ----------

@ -549,18 +570,19 @@ a globally visible place on your system, for others to access.  Note
 that you may need super-user privileges (e.g. sudo) if the directory
 you want to copy files to is protected.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   cmake -D CMAKE_INSTALL_PREFIX=path [options ...] ../cmake
-   make                        # perform make after CMake command
-   make install                # perform the installation into prefix
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         cmake -D CMAKE_INSTALL_PREFIX=path [options ...] ../cmake
+         make                        # perform make after CMake command
+         make install                # perform the installation into prefix

-There is no "install" option in the ``src/Makefile`` for LAMMPS.  If
-you wish to do this you will need to first build LAMMPS, then manually
-copy the desired LAMMPS files to the appropriate system directories.
+   .. tab:: Traditional make
+
+      There is no "install" option in the ``src/Makefile`` for LAMMPS.
+      If you wish to do this you will need to first build LAMMPS, then
+      manually copy the desired LAMMPS files to the appropriate system
+      directories.
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
--- a/doc/src/Build_link.rst
+++ b/doc/src/Build_link.rst
@ -41,42 +41,45 @@ The benefit of linking to a static library is, that the resulting
 executable is independent of that library since all required
 executable code from the library is copied into the calling executable.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-This assumes that LAMMPS has been configured without setting a
-``LAMMPS_MACHINE`` name, installed with "make install", and the
-``PKG_CONFIG_PATH`` environment variable has been updated to include the
-``liblammps.pc`` file installed into the configured destination folder.
-The commands to compile and link a coupled executable are then:
+   .. tab:: CMake build

-.. code-block:: bash
+      This assumes that LAMMPS has been configured without setting a
+      ``LAMMPS_MACHINE`` name, installed with "make install", and the
+      ``PKG_CONFIG_PATH`` environment variable has been updated to
+      include the ``liblammps.pc`` file installed into the configured
+      destination folder.  The commands to compile and link a coupled
+      executable are then:

-   mpicc -c -O $(pkgconf liblammps --cflags) caller.c
-   mpicxx -o caller caller.o -$(pkgconf liblammps --libs)
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         mpicc -c -O $(pkgconf liblammps --cflags) caller.c
+         mpicxx -o caller caller.o -$(pkgconf liblammps --libs)

-This assumes that LAMMPS has been compiled in the folder
-``${HOME}/lammps/src`` with "make mpi". The commands to compile and link
-a coupled executable are then:
+   .. tab:: Traditional make

-.. code-block:: bash
+      This assumes that LAMMPS has been compiled in the folder
+      ``${HOME}/lammps/src`` with "make mpi". The commands to compile
+      and link a coupled executable are then:

-   mpicc -c -O -I${HOME}/lammps/src caller.c
-   mpicxx -o caller caller.o -L${HOME}/lammps/src -llammps_mpi
+      .. code-block:: bash

-The *-I* argument is the path to the location of the ``library.h``
-header file containing the interface to the LAMMPS C-style library
-interface.  The *-L* argument is the path to where the ``liblammps_mpi.a``
-file is located.  The *-llammps_mpi* argument is shorthand for telling the
-compiler to link the file ``liblammps_mpi.a``.  If LAMMPS has been
-built as a shared library, then the linker will use ``liblammps_mpi.so``
-instead.  If both files are available, the linker will usually prefer
-the shared library.  In case of a shared library, you may need to update
-the ``LD_LIBRARY_PATH`` environment variable or running the ``caller``
-executable will fail since it cannot find the shared library at runtime.
+         mpicc -c -O -I${HOME}/lammps/src caller.c
+         mpicxx -o caller caller.o -L${HOME}/lammps/src -llammps_mpi
+
+      The *-I* argument is the path to the location of the ``library.h``
+      header file containing the interface to the LAMMPS C-style library
+      interface.  The *-L* argument is the path to where the
+      ``liblammps_mpi.a`` file is located.  The *-llammps_mpi* argument
+      is shorthand for telling the compiler to link the file
+      ``liblammps_mpi.a``.  If LAMMPS has been built as a shared
+      library, then the linker will use ``liblammps_mpi.so`` instead.
+      If both files are available, the linker will usually prefer the
+      shared library.  In case of a shared library, you may need to
+      update the ``LD_LIBRARY_PATH`` environment variable or running the
+      ``caller`` executable will fail since it cannot find the shared
+      library at runtime.

 However, it is only as simple as shown above for the case of a plain
 LAMMPS library without any optional packages that depend on libraries
@ -84,61 +87,62 @@ LAMMPS library without any optional packages that depend on libraries
 need to include all flags, libraries, and paths for the coupled
 executable, that are also required to link the LAMMPS executable.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-When using CMake, additional libraries with sources in the lib folder
-are built, but not included in ``liblammps.a`` and (currently) not
-installed with ``make install`` and not included in the ``pkgconfig``
-configuration file.  They can be found in the top level build folder,
-but you have to determine the necessary link flags manually.  It is
-therefore recommended to either use the traditional make procedure to
-build and link with a static library or build and link with a shared
-library instead.
+   .. tab:: CMake build

-Traditional make
-^^^^^^^^^^^^^^^^
+      When using CMake, additional libraries with sources in the lib
+      folder are built, but not included in ``liblammps.a`` and
+      (currently) not installed with ``make install`` and not included
+      in the ``pkgconfig`` configuration file.  They can be found in the
+      top level build folder, but you have to determine the necessary
+      link flags manually.  It is therefore recommended to either use
+      the traditional make procedure to build and link with a static
+      library or build and link with a shared library instead.

-After you have compiled a static LAMMPS library using the conventional
-build system for example with "make mode=static serial". And you also
-have installed the ``POEMS`` package after building its bundled library
-in ``lib/poems``. Then the commands to build and link the coupled executable
-change to:
+   .. tab:: Traditional make

-.. code-block:: bash
+      After you have compiled a static LAMMPS library using the
+      conventional build system for example with "make mode=static
+      serial". And you also have installed the ``POEMS`` package after
+      building its bundled library in ``lib/poems``. Then the commands
+      to build and link the coupled executable change to:

-   gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
-   g++ -o caller caller.o -L${HOME}/lammps/lib/poems \
-     -L${HOME}/lammps/src/STUBS -L${HOME}/lammps/src -llammps_serial -lpoems -lmpi_stubs
+      .. code-block:: bash

-Note, that you need to link with ``g++`` instead of ``gcc`` even if you have
-written your code in C, since LAMMPS itself is C++ code.  You can display the
-currently applied settings for building LAMMPS for the "serial" machine target
-by using the command:
+         gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
+         g++ -o caller caller.o -L${HOME}/lammps/lib/poems \
+                      -L${HOME}/lammps/src/STUBS -L${HOME}/lammps/src \
+                      -llammps_serial -lpoems -lmpi_stubs

-.. code-block:: bash
+      Note, that you need to link with ``g++`` instead of ``gcc`` even
+      if you have written your code in C, since LAMMPS itself is C++
+      code.  You can display the currently applied settings for building
+      LAMMPS for the "serial" machine target by using the command:

-   make mode=print serial
+      .. code-block:: bash

-Which should output something like:
+         make mode=print serial

-.. code-block:: bash
+      Which should output something like:

-   # Compiler:
-   CXX=g++
-   # Linker:
-   LD=g++
-   # Compilation:
-   CXXFLAGS=-g -O3 -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64 -I${HOME}/compile/lammps/lib/poems -I${HOME}/compile/lammps/src/STUBS
-   # Linking:
-   LDFLAGS=-g -O
-   # Libraries:
-   LDLIBS=-L${HOME}/compile/lammps/src -llammps_serial -L${HOME}/compile/lammps/lib/poems -L${HOME}/compile/lammps/src/STUBS -lpoems -lmpi_stubs
+      .. code-block:: bash

-From this you can gather the necessary paths and flags.  With
-makefiles for other *machine* configurations you need to do the
-equivalent and replace "serial" with the corresponding "machine" name
-of the makefile.
+         # Compiler:
+         CXX=g++
+         # Linker:
+         LD=g++
+         # Compilation:
+         CXXFLAGS=-g -O3 -DLAMMPS_GZIP -DLAMMPS_MEMALIGN=64 -I${HOME}/compile/lammps/lib/poems -I${HOME}/compile/lammps/src/STUBS
+         # Linking:
+         LDFLAGS=-g -O
+         # Libraries:
+         LDLIBS=-L${HOME}/compile/lammps/src -llammps_serial -L${HOME}/compile/lammps/lib/poems -L${HOME}/compile/lammps/src/STUBS -lpoems -lmpi_stubs
+
+      From this you can gather the necessary paths and flags.  With
+      makefiles for other *machine* configurations you need to do the
+      equivalent and replace "serial" with the corresponding "machine"
+      name of the makefile.

 Link with LAMMPS as a shared library
 ------------------------------------
@ -151,35 +155,36 @@ linking the calling executable.  Only the *-I* flags are needed.  So the
 example case from above of the serial version static LAMMPS library with
 the POEMS package installed becomes:

-CMake build
-^^^^^^^^^^^
+.. tabs::

-The commands with a shared LAMMPS library compiled with the CMake
-build process are the same as for the static library.
+   .. tab:: CMake build

-.. code-block:: bash
+      The commands with a shared LAMMPS library compiled with the CMake
+      build process are the same as for the static library.

-   mpicc -c -O $(pkgconf liblammps --cflags) caller.c
-   mpicxx -o caller caller.o -$(pkgconf --libs)
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         mpicc -c -O $(pkgconf liblammps --cflags) caller.c
+         mpicxx -o caller caller.o -$(pkgconf --libs)

-The commands with a shared LAMMPS library compiled with the
-traditional make build using ``make mode=shared serial`` becomes:
+   .. tab:: Traditional make

-.. code-block:: bash
+      The commands with a shared LAMMPS library compiled with the
+      traditional make build using ``make mode=shared serial`` becomes:

-   gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
-   g++ -o caller caller.o -L${HOME}/lammps/src -llammps_serial
+      .. code-block:: bash

-*Locating liblammps.so at runtime*\ :
+         gcc -c -O -I${HOME}/lammps/src/STUBS -I${HOME}/lammps/src -caller.c
+         g++ -o caller caller.o -L${HOME}/lammps/src -llammps_serial

-However, now the ``liblammps.so`` file is required at runtime and needs
-to be in a folder, where the shared linker program of the operating
-system can find it.  This would be either a folder like ``/usr/local/lib64``
-or ``${HOME}/.local/lib64`` or a folder pointed to by the ``LD_LIBRARY_PATH``
-environment variable. You can type
+Locating liblammps.so at runtime
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Unlike with a static link, now the ``liblammps.so`` file is required at
+runtime and needs to be in a folder, where the shared linker program of
+the operating system can find it.  This would be either a folder like
+``/usr/local/lib64`` or ``${HOME}/.local/lib64`` or a folder pointed to
+by the ``LD_LIBRARY_PATH`` environment variable. You can type

 .. code-block:: bash

@ -187,9 +192,10 @@ environment variable. You can type

 to see what directories are in that list.

-Or you can add the LAMMPS src directory (or the directory you performed
-a CMake style build in) to your ``LD_LIBRARY_PATH``, so that the current
-version of the shared library is always available to programs that use it.
+Or you can add the LAMMPS src directory or the directory you performed a
+CMake style build in to your ``LD_LIBRARY_PATH`` environment variable,
+so that the current version of the shared library is always available to
+programs that use it.

 For the Bourne or Korn shells (/bin/sh, /bin/ksh, /bin/bash etc.), you
 would add something like this to your ``${HOME}/.profile`` file:
--- a/doc/src/Build_package.rst
+++ b/doc/src/Build_package.rst
@ -45,91 +45,92 @@ packages:
 The mechanism for including packages is simple but different for CMake
 versus make.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: csh
+   .. tab:: CMake build

-   -D PKG_NAME=value          # yes or no (default)
+      .. code-block:: csh

-Examples:
+         -D PKG_NAME=value          # yes or no (default)

-.. code-block:: csh
+      Examples:

-   -D PKG_MANYBODY=yes
-   -D PKG_USER-INTEL=yes
+      .. code-block:: csh

-All standard and user packages are included the same way.  Note that
-USER packages have a hyphen between USER and the rest of the package
-name, not an underscore.
+         -D PKG_MANYBODY=yes
+         -D PKG_USER-INTEL=yes

-See the shortcut section below for how to install many packages at
-once with CMake.
+      All standard and user packages are included the same way.  Note
+      that USER packages have a hyphen between USER and the rest of the
+      package name, not an underscore.
+
+      See the shortcut section below for how to install many packages at
+      once with CMake.
+
+      .. note::
+
+         If you switch between building with CMake and make builds, no
+         packages in the src directory can be installed when you invoke
+         ``cmake``.  CMake will give an error if that is not the case,
+         indicating how you can un-install all packages in the src dir.
+
+   .. tab:: Traditional make
+
+      .. code-block:: bash
+
+         cd lammps/src
+         make ps                    # check which packages are currently installed
+         make yes-name              # install a package with name
+         make no-name               # un-install a package with name
+         make mpi                   # build LAMMPS with whatever packages are now installed
+
+      Examples:
+
+      .. code-block:: bash
+
+         make no-rigid
+         make yes-user-intel
+
+      All standard and user packages are included the same way.
+
+      See the shortcut section below for how to install many packages at
+      once with make.
+
+      .. note::
+
+         You must always re-build LAMMPS (via make) after installing or
+         un-installing a package, for the action to take effect. The
+         included dependency tracking will make certain only files that
+         are required to be rebuilt are recompiled.
+
+      .. note::
+
+         You cannot install or un-install packages and build LAMMPS in a
+         single make command with multiple targets, e.g. ``make
+         yes-colloid mpi``.  This is because the make procedure creates
+         a list of source files that will be out-of-date for the build
+         if the package configuration changes within the same command.
+         You can include or exclude multiple packages in a single make
+         command, e.g. ``make yes-colloid no-manybody``.
+
+
+Information for both build systems
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Almost all packages can be included or excluded in a LAMMPS build,
+independent of the other packages.  However, some packages include files
+derived from files in other packages.  LAMMPS checks for this and does
+the right thing.  Individual files are only included if their
+dependencies are already included.  Likewise, if a package is excluded,
+other files dependent on that package are also excluded.

 .. note::

-   If you toggle back and forth between building with CMake vs
-   make, no packages in the src directory can be installed when you
-   invoke cmake.  CMake will give an error if that is not the case,
-   indicating how you can un-install all packages in the src dir.
-
-Traditional make
-^^^^^^^^^^^^^^^^
-
-.. code-block:: bash
-
-   cd lammps/src
-   make ps                    # check which packages are currently installed
-   make yes-name              # install a package with name
-   make no-name               # un-install a package with name
-   make mpi                   # build LAMMPS with whatever packages are now installed
-
-Examples:
-
-.. code-block:: bash
-
-   make no-rigid
-   make yes-user-intel
-
-All standard and user packages are included the same way.
-
-See the shortcut section below for how to install many packages at
-once with make.
-
-.. note::
-
-   You must always re-build LAMMPS (via make) after installing or
-   un-installing a package, for the action to take effect.
-
-.. note::
-
-   You cannot install or un-install packages and build LAMMPS in a
-   single make command with multiple targets, e.g. make yes-colloid mpi.
-   This is because the make procedure creates a list of source files that
-   will be out-of-date for the build if the package configuration changes
-   within the same command.  You can include or exclude multiple packages
-   in a single make command, e.g. make yes-colloid no-manybody.
-
-CMake and make info
-^^^^^^^^^^^^^^^^^^^
-
-Any package can be included or excluded in a LAMMPS build, independent
-of all other packages.  However, some packages include files derived
-from files in other packages.  LAMMPS checks for this and does the
-right thing.  Individual files are only included if their dependencies
-are already included.  Likewise, if a package is excluded, other files
-dependent on that package are also excluded.
-
-When you download a LAMMPS tarball or download LAMMPS source files
-from the git repository, no packages are pre-installed in the
-src directory.
-
-.. note::
-
-   Prior to Aug 2018, if you downloaded a tarball, 3 packages
-   (KSPACE, MANYBODY, MOLECULE) were pre-installed in the src directory.
-   That is no longer the case, so that CMake will build as-is without the
-   need to un-install those packages.
+   By default no package is installed.  Prior to August 2018, however,
+   if you downloaded a tarball, 3 packages (KSPACE, MANYBODY, MOLECULE)
+   were pre-installed via the traditional make procedure in the ``src``
+   directory.  That is no longer the case, so that CMake will build
+   as-is without needing to un-install those packages.

 ----------

--- a/doc/src/Build_settings.rst
+++ b/doc/src/Build_settings.rst
@ -44,74 +44,71 @@ require use of an FFT library to compute 1d FFTs.  The KISS FFT
 library is included with LAMMPS but other libraries can be faster.
 LAMMPS can use them if they are available on your system.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D FFT=value              # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
-   -D FFT_SINGLE=value       # yes or no (default), no = double precision
-   -D FFT_PACK=value         # array (default) or pointer or memcpy
+      .. code-block:: bash

-.. note::
+         -D FFT=value              # FFTW3 or MKL or KISS, default is FFTW3 if found, else KISS
+         -D FFT_SINGLE=value       # yes or no (default), no = double precision
+         -D FFT_PACK=value         # array (default) or pointer or memcpy

-   The values for the FFT variable must be in upper-case.  This is
-   an exception to the rule that all CMake variables can be specified
-   with lower-case values.
+      .. note::

-Usually these settings are all that is needed.  If FFTW3 is selected,
-then CMake will try to detect, if threaded FFTW libraries are available
-and enable them by default.  This setting is independent of whether
-OpenMP threads are enabled and a packages like KOKKOS or USER-OMP is
-used.  If CMake cannot detect the FFT library, you can set these variables
-to assist:
+         The values for the FFT variable must be in upper-case.  This is
+         an exception to the rule that all CMake variables can be specified
+         with lower-case values.

-.. code-block:: bash
+      Usually these settings are all that is needed.  If FFTW3 is
+      selected, then CMake will try to detect, if threaded FFTW
+      libraries are available and enable them by default.  This setting
+      is independent of whether OpenMP threads are enabled and a
+      packages like KOKKOS or USER-OMP is used.  If CMake cannot detect
+      the FFT library, you can set these variables to assist:

-   -D FFTW3_INCLUDE_DIRS=path  # path to FFTW3 include files
-   -D FFTW3_LIBRARIES=path     # path to FFTW3 libraries
-   -D FFT_FFTW_THREADS=on      # enable using threaded FFTW3 libraries
-   -D MKL_INCLUDE_DIRS=path    # ditto for Intel MKL library
-   -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
-   -D MKL_LIBRARIES=path
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         -D FFTW3_INCLUDE_DIR=path   # path to FFTW3 include files
+         -D FFTW3_LIBRARY=path       # path to FFTW3 libraries
+         -D FFT_FFTW_THREADS=on      # enable using threaded FFTW3 libraries
+         -D MKL_INCLUDE_DIR=path     # ditto for Intel MKL library
+         -D FFT_MKL_THREADS=on       # enable using threaded FFTs with MKL libraries
+         -D MKL_LIBRARY=path         # path to MKL libraries

-To change the FFT library to be used and its options, you have to edit
-your machine Makefile. Below are examples how the makefile variables
-could be changed.
+   .. tab:: Traditional make

-.. code-block:: make
+      To change the FFT library to be used and its options, you have to edit
+      your machine Makefile. Below are examples how the makefile variables
+      could be changed.

-   FFT_INC = -DFFT_FFTW3         # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
-                                 # default is KISS if not specified
-   FFT_INC = -DFFT_SINGLE        # do not specify for double precision
-   FFT_INC = -DFFT_FFTW_THREADS  # enable using threaded FFTW3 libraries
-   FFT_INC = -DFFT_MKL_THREADS   # enable using threaded FFTs with MKL libraries
-   FFT_INC = -DFFT_PACK_ARRAY    # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY
+      .. code-block:: make

-# default is FFT_PACK_ARRAY if not specified
+         FFT_INC = -DFFT_FFTW3         # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
+                                       # default is KISS if not specified
+         FFT_INC = -DFFT_SINGLE        # do not specify for double precision
+         FFT_INC = -DFFT_FFTW_THREADS  # enable using threaded FFTW3 libraries
+         FFT_INC = -DFFT_MKL_THREADS   # enable using threaded FFTs with MKL libraries
+         FFT_INC = -DFFT_PACK_ARRAY    # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY
+                                       # default is FFT_PACK_ARRAY if not specified

-.. code-block:: make
+      .. code-block:: make

-   FFT_INC =       -I/usr/local/include
-   FFT_PATH =      -L/usr/local/lib
-   FFT_LIB =       -lfftw3             # FFTW3 double precision
-   FFT_LIB =       -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
-   FFT_LIB =       -lfftw3 -lfftw3f    # FFTW3 single precision
-   FFT_LIB =       -lmkl_intel_lp64 -lmkl_sequential -lmkl_core   # MKL with Intel compiler, serial interface
-   FFT_LIB =       -lmkl_gf_lp64 -lmkl_sequential -lmkl_core      # MKL with GNU compiler, serial interface
-   FFT_LIB =       -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface
-   FFT_LIB =       -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core      # MKL with GNU compiler, threaded interface
-   FFT_LIB =       -lmkl_rt            # MKL with automatic runtime selection of interface libs
+         FFT_INC =       -I/usr/local/include
+         FFT_PATH =      -L/usr/local/lib
+         FFT_LIB =       -lfftw3             # FFTW3 double precision
+         FFT_LIB =       -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
+         FFT_LIB =       -lfftw3 -lfftw3f    # FFTW3 single precision
+         FFT_LIB =       -lmkl_intel_lp64 -lmkl_sequential -lmkl_core   # MKL with Intel compiler, serial interface
+         FFT_LIB =       -lmkl_gf_lp64 -lmkl_sequential -lmkl_core      # MKL with GNU compiler, serial interface
+         FFT_LIB =       -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface
+         FFT_LIB =       -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core      # MKL with GNU compiler, threaded interface
+         FFT_LIB =       -lmkl_rt            # MKL with automatic runtime selection of interface libs

-As with CMake, you do not need to set paths in ``FFT_INC`` or ``FFT_PATH``, if
-the compiler can find the FFT header and library files in its default search path.
-You must specify ``FFT_LIB`` with the appropriate FFT libraries to include in the link.
-
-CMake build
-^^^^^^^^^^^
+      As with CMake, you do not need to set paths in ``FFT_INC`` or
+      ``FFT_PATH``, if the compiler can find the FFT header and library
+      files in its default search path.  You must specify ``FFT_LIB``
+      with the appropriate FFT libraries to include in the link.

 The `KISS FFT library <http://kissfft.sf.net>`_ is included in the LAMMPS
 distribution.  It is portable across all platforms.  Depending on the size
@ -177,76 +174,104 @@ ARRAY mode.

 .. _size:

-Size of LAMMPS integer types
------------------------------------
+Size of LAMMPS integer types and size limits
+--------------------------------------------

 LAMMPS has a few integer data types which can be defined as either
 4-byte (= 32-bit) or 8-byte (= 64-bit) integers at compile time.
+This has an impact on the size of a system that can be simulated
+or how large counters can become before "rolling over".
 The default setting of "smallbig" is almost always adequate.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D LAMMPS_SIZES=value   # smallbig (default) or bigbig or smallsmall
+      With CMake the choice of integer types is made via setting a
+      variable during configuration.

-Traditional build
-^^^^^^^^^^^^^^^^^
+      .. code-block:: bash

-If you want a setting different from the default, you need to edit your
-machine Makefile.
+         -D LAMMPS_SIZES=value   # smallbig (default) or bigbig or smallsmall

-.. code-block:: make
+      If the variable is not set explicitly, "smallbig" is used.

-   LMP_INC = -DLAMMPS_SMALLBIG    # or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL
+   .. tab:: Traditional build

-The default setting is ``-DLAMMPS_SMALLBIG`` if nothing is specified
+      If you want a setting different from the default, you need to edit the
+      ``LMP_INC`` variable setting your machine Makefile.

-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+      .. code-block:: make

-The default "smallbig" setting allows for simulations with:
+         LMP_INC = -DLAMMPS_SMALLBIG    # or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL

-* total atom count = 2\^63 atoms (about 9e18)
-* total timesteps = 2\^63 (about 9e18)
-* atom IDs = 2\^31 (about 2 billion)
-* image flags = roll over at 512
+      The default setting is ``-DLAMMPS_SMALLBIG`` if nothing is specified

-The "bigbig" setting increases the latter two limits.  It allows for:
+LAMMPS system size restrictions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

-* total atom count = 2\^63 atoms (about 9e18)
-* total timesteps = 2\^63 (about 9e18)
-* atom IDs = 2\^63 (about 9e18)
-* image flags = roll over at about 1 million (2\^20)
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+   :align: center

-The "smallsmall" setting is only needed if your machine does not
-support 8-byte integers.  It allows for:
+   * -
+     - smallbig
+     - bigbig
+     - smallsmall
+   * - Total atom count
+     - :math:`2^{63}` atoms (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{63}` atoms (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{31}` atoms (= :math:`2.147 \cdot 10^9`)
+   * - Total timesteps
+     - :math:`2^{63}` steps (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{63}` steps (= :math:`9.223 \cdot 10^{18}`)
+     - :math:`2^{31}` steps (= :math:`2.147 \cdot 10^9`)
+   * - Atom ID values
+     - :math:`1 \le i \le 2^{31} (= 2.147 \dot 10^9)`
+     - :math:`1 \le i \le 2^{63} (= 9.223 \cdot 10^{18})`
+     - :math:`1 \le i \le 2^{31} (= 2.147 \dot 10^9)`
+   * - Image flag values
+     - :math:`-512 \le i \le 511`
+     - :math:`- 1\,048\,576 \le i \le 1\,048\,575`
+     - :math:`-512 \le i \le 511`

-* total atom count = 2\^31 atoms (about 2 billion)
-* total timesteps = 2\^31 (about 2 billion)
-* atom IDs = 2\^31 (about 2 billion)
-* image flags = roll over at 512 (2\^9)
+The "bigbig" setting increases the size of image flags and atom IDs over
+"smallbig" and the "smallsmall" setting is only needed if your machine
+does not support 64-bit integers or incurs performance penalties when
+using them.
+
+These are limits for the core of the LAMMPS code, specific features or
+some styles may impose additional limits.  The :ref:`USER-ATC
+<PKG-USER-ATC>` package cannot be compiled with the "bigbig" setting.
+Also, there are limitations when using the library interface where some
+functions with known issues have been replaced by dummy calls printing a
+corresponding error message rather than crashing randomly or corrupting
+data.

 Atom IDs are not required for atomic systems which do not store bond
 topology information, though IDs are enabled by default.  The
 :doc:`atom_modify id no <atom_modify>` command will turn them off.  Atom
 IDs are required for molecular systems with bond topology (bonds,
-angles, dihedrals, etc).  Thus if you model a molecular system with
-more than 2 billion atoms, you need the "bigbig" setting.
+angles, dihedrals, etc).  Similarly, some force or compute or fix styles
+require atom IDs.  Thus if you model a molecular system or use one of
+those styles with more than 2 billion atoms, you need the "bigbig"
+setting.

-Image flags store 3 values per atom which count the number of times an
-atom has moved through the periodic box in each dimension.  See the
-:doc:`dump <dump>` doc page for a discussion.  If an atom moves through
-the periodic box more than this limit, the value will "roll over",
-e.g. from 511 to -512, which can cause diagnostics like the
-mean-squared displacement, as calculated by the :doc:`compute msd <compute_msd>` command, to be faulty.
+Regardless of the total system size limits, the maximum number of atoms
+per MPI rank (local + ghost atoms) is limited to 2 billion for atomic
+systems and 500 million for systems with bonds (the additional
+restriction is due to using the 2 upper bits of the local atom index
+in neighbor lists for storing special bonds info).
+
+Image flags store 3 values per atom in a single integer which count the
+number of times an atom has moved through the periodic box in each
+dimension.  See the :doc:`dump <dump>` doc page for a discussion.  If an
+atom moves through the periodic box more than this limit, the value will
+"roll over", e.g. from 511 to -512, which can cause diagnostics like the
+mean-squared displacement, as calculated by the :doc:`compute msd
+<compute_msd>` command, to be faulty.

-Note that the USER-ATC package and the USER-INTEL package are currently
-not compatible with the "bigbig" setting. Also, there are limitations
-when using the library interface. Some functions with known issues
-have been replaced by dummy calls printing a corresponding error rather
-than crashing randomly or corrupting data.

 Also note that the GPU package requires its lib/gpu library to be
 compiled with the same size setting, or the link will fail.  A CMake
@ -265,54 +290,51 @@ PNG image files.  Likewise the :doc:`dump movie <dump_image>` command
 outputs movie files in MPEG format.  Using these options requires the
 following settings:

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D WITH_JPEG=value      # yes or no
-                           # default = yes if CMake finds JPEG files, else no
-   -D WITH_PNG=value       # yes or no
-                           # default = yes if CMake finds PNG and ZLIB files, else no
-   -D WITH_FFMPEG=value    # yes or no
-                           # default = yes if CMake can find ffmpeg, else no
+      .. code-block:: bash

-Usually these settings are all that is needed.  If CMake cannot find
-the graphics header, library, executable files, you can set these
-variables:
+         -D WITH_JPEG=value      # yes or no
+                                 # default = yes if CMake finds JPEG files, else no
+         -D WITH_PNG=value       # yes or no
+                                 # default = yes if CMake finds PNG and ZLIB files, else no
+         -D WITH_FFMPEG=value    # yes or no
+                                 # default = yes if CMake can find ffmpeg, else no

-.. code-block:: bash
+      Usually these settings are all that is needed.  If CMake cannot
+      find the graphics header, library, executable files, you can set
+      these variables:

-   -D JPEG_INCLUDE_DIR=path    # path to jpeglib.h header file
-   -D JPEG_LIBRARIES=path      # path to libjpeg.a (.so) file
-   -D PNG_INCLUDE_DIR=path     # path to png.h header file
-   -D PNG_LIBRARIES=path       # path to libpng.a (.so) file
-   -D ZLIB_INCLUDE_DIR=path    # path to zlib.h header file
-   -D ZLIB_LIBRARIES=path      # path to libz.a (.so) file
-   -D FFMPEG_EXECUTABLE=path   # path to ffmpeg executable
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         -D JPEG_INCLUDE_DIR=path    # path to jpeglib.h header file
+         -D JPEG_LIBRARY=path        # path to libjpeg.a (.so) file
+         -D PNG_INCLUDE_DIR=path     # path to png.h header file
+         -D PNG_LIBRARY=path         # path to libpng.a (.so) file
+         -D ZLIB_INCLUDE_DIR=path    # path to zlib.h header file
+         -D ZLIB_LIBRARY=path        # path to libz.a (.so) file
+         -D FFMPEG_EXECUTABLE=path   # path to ffmpeg executable

-.. code-block:: make
+   .. tab:: Traditional make

-   LMP_INC = -DLAMMPS_JPEG
-   LMP_INC = -DLAMMPS_PNG
-   LMP_INC = -DLAMMPS_FFMPEG
+      .. code-block:: make

-   JPG_INC = -I/usr/local/include   # path to jpeglib.h, png.h, zlib.h header files if make cannot find them
-   JPG_PATH = -L/usr/lib            # paths to libjpeg.a, libpng.a, libz.a (.so) files if make cannot find them
-   JPG_LIB = -ljpeg -lpng -lz       # library names
+         LMP_INC = -DLAMMPS_JPEG
+         LMP_INC = -DLAMMPS_PNG
+         LMP_INC = -DLAMMPS_FFMPEG

-As with CMake, you do not need to set ``JPG_INC`` or ``JPG_PATH``,
-if make can find the graphics header and library files.  You must
-specify ``JPG_LIB``
-with a list of graphics libraries to include in the link.  You must
-insure ffmpeg is in a directory where LAMMPS can find it at runtime,
-that is a directory in your PATH environment variable.
+         JPG_INC = -I/usr/local/include   # path to jpeglib.h, png.h, zlib.h header files if make cannot find them
+         JPG_PATH = -L/usr/lib            # paths to libjpeg.a, libpng.a, libz.a (.so) files if make cannot find them
+         JPG_LIB = -ljpeg -lpng -lz       # library names

-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+      As with CMake, you do not need to set ``JPG_INC`` or ``JPG_PATH``,
+      if make can find the graphics header and library files.  You must
+      specify ``JPG_LIB`` with a list of graphics libraries to include
+      in the link.  You must insure ffmpeg is in a directory where
+      LAMMPS can find it at runtime, that is a directory in your PATH
+      environment variable.

 Using ``ffmpeg`` to output movie files requires that your machine
 supports the "popen" function in the standard runtime library.
@ -335,37 +357,34 @@ If this option is enabled, large files can be read or written with
 gzip compression by several LAMMPS commands, including
 :doc:`read_data <read_data>`, :doc:`rerun <rerun>`, and :doc:`dump <dump>`.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D WITH_GZIP=value       # yes or no
-                            # default is yes if CMake can find gzip, else no
-   -D GZIP_EXECUTABLE=path  # path to gzip executable if CMake cannot find it
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         -D WITH_GZIP=value       # yes or no
+                                  # default is yes if CMake can find gzip, else no
+         -D GZIP_EXECUTABLE=path  # path to gzip executable if CMake cannot find it

-.. code-block:: make
+   .. tab:: Traditional make

-   LMP_INC = -DLAMMPS_GZIP
+      .. code-block:: make

-CMake and make info
-^^^^^^^^^^^^^^^^^^^
+         LMP_INC = -DLAMMPS_GZIP

-This option requires that your machine supports the "popen()" function
-in the standard runtime library and that a gzip executable can be
+This option requires that your operating system fully supports the "popen()"
+function in the standard runtime library and that a ``gzip`` executable can be
 found by LAMMPS during a run.

 .. note::

-   On some clusters with high-speed networks, using the fork()
-   library call (required by popen()) can interfere with the fast
-   communication library and lead to simulations using compressed output
-   or input to hang or crash. For selected operations, compressed file
-   I/O is also available using a compression library instead, which is
-   what the :ref:`COMPRESS package <PKG-COMPRESS>` enables.
+   On some clusters with high-speed networks, using the "fork()" library
+   call (required by "popen()") can interfere with the fast communication
+   library and lead to simulations using compressed output or input to
+   hang or crash. For selected operations, compressed file I/O is also
+   available using a compression library instead, which is what the
+   :ref:`COMPRESS package <PKG-COMPRESS>` enables.

 ----------

@ -374,65 +393,66 @@ found by LAMMPS during a run.
 Memory allocation alignment
 ---------------------------------------

-This setting enables the use of the posix_memalign() call instead of
-malloc() when LAMMPS allocates large chunks or memory.  This can make
-vector instructions on CPUs more efficient, if dynamically allocated
-memory is aligned on larger-than-default byte boundaries.
-On most current systems, the malloc() implementation returns
+This setting enables the use of the "posix_memalign()" call instead of
+"malloc()" when LAMMPS allocates large chunks or memory.  Vector
+instructions on CPUs may become more efficient, if dynamically allocated
+memory is aligned on larger-than-default byte boundaries.  On most
+current operating systems, the "malloc()" implementation returns
 pointers that are aligned to 16-byte boundaries. Using SSE vector
-instructions efficiently, however, requires memory blocks being
-aligned on 64-byte boundaries.
+instructions efficiently, however, requires memory blocks being aligned
+on 64-byte boundaries.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D LAMMPS_MEMALIGN=value            # 0, 8, 16, 32, 64 (default)
+      .. code-block:: bash

-Use a ``LAMMPS_MEMALIGN`` value of 0 to disable using posix_memalign()
-and revert to using the malloc() C-library function instead.  When
-compiling LAMMPS for Windows systems, malloc() will always be used
-and this setting ignored.
+         -D LAMMPS_MEMALIGN=value            # 0, 8, 16, 32, 64 (default)

-Traditional make
-^^^^^^^^^^^^^^^^
+      Use a ``LAMMPS_MEMALIGN`` value of 0 to disable using
+      "posix_memalign()" and revert to using the "malloc()" C-library
+      function instead.  When compiling LAMMPS for Windows systems,
+      "malloc()" will always be used and this setting is ignored.

-.. code-block:: make
+   .. tab:: Traditional make

-   LMP_INC = -DLAMMPS_MEMALIGN=value   # 8, 16, 32, 64
+      .. code-block:: make

-Do not set ``-DLAMMPS_MEMALIGN``, if you want to have memory allocated
-with the malloc() function call instead. ``-DLAMMPS_MEMALIGN`` **cannot**
-be used on Windows, as it does use different function calls for
-allocating aligned memory, that are not compatible with how LAMMPS
-manages its dynamical memory.
+         LMP_INC = -DLAMMPS_MEMALIGN=value   # 8, 16, 32, 64
+
+      Do not set ``-DLAMMPS_MEMALIGN``, if you want to have memory
+      allocated with the "malloc()" function call
+      instead. ``-DLAMMPS_MEMALIGN`` **cannot** be used on Windows, as
+      Windows different function calls with different semantics for
+      allocating aligned memory, that are not compatible with how LAMMPS
+      manages its dynamical memory.

 ----------

 .. _longlong:

 Workaround for long long integers
------------------------------------------------
+---------------------------------

 If your system or MPI version does not recognize "long long" data
 types, the following setting will be needed.  It converts "long long"
 to a "long" data type, which should be the desired 8-byte integer on
 those systems:

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D LAMMPS_LONGLONG_TO_LONG=value     # yes or no (default)
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         -D LAMMPS_LONGLONG_TO_LONG=value     # yes or no (default)

-.. code-block:: make
+   .. tab:: Traditional make

-   LMP_INC = -DLAMMPS_LONGLONG_TO_LONG
+      .. code-block:: make
+
+         LMP_INC = -DLAMMPS_LONGLONG_TO_LONG

 ----------

@ -447,19 +467,19 @@ Instead, the call stack is unwound and control returns to the caller,
 e.g. to Python. Of course, the calling code has to be set up to
 *catch* exceptions thrown from within LAMMPS.

-CMake build
-^^^^^^^^^^^
+.. tabs::

-.. code-block:: bash
+   .. tab:: CMake build

-   -D LAMMPS_EXCEPTIONS=value        # yes or no (default)
+      .. code-block:: bash

-Traditional make
-^^^^^^^^^^^^^^^^
+         -D LAMMPS_EXCEPTIONS=value        # yes or no (default)

-.. code-block:: make
+   .. tab:: Traditional make

-   LMP_INC = -DLAMMPS_EXCEPTIONS
+      .. code-block:: make
+
+         LMP_INC = -DLAMMPS_EXCEPTIONS

 .. note::

--- a/doc/src/Manual.rst
+++ b/doc/src/Manual.rst
@ -72,8 +72,6 @@ every LAMMPS command.
   pg_library
   Modify
   pg_developer
-..   pg_modify
-..   pg_base

 .. toctree::
   :caption: Index
--- a/doc/src/_ext/tab_or_note.py
+++ b/doc/src/_ext/tab_or_note.py
@ -0,0 +1,15 @@
+
+def replace_tabs_handler(app, docname, source):
+    """ When builder is not 'html', remove 'tabs' directive
+    and replace any 'tab' directive with 'admonition'"""
+    if app.builder.name != 'html':
+        for i in range(len(source)):
+            source[i] = source[i].replace('.. tabs::','').replace('.. tab::','.. admonition::')
+
+def setup(app):
+    app.connect('source-read', replace_tabs_handler)
+    return {
+        'version': '0.1',
+        'parallel_read_safe': True,
+        'parallel_write_safe': True,
+    }
--- a/doc/src/pg_atom.rst
+++ b/doc/src/pg_atom.rst
@ -0,0 +1,9 @@
+LAMMPS Atom and AtomVec Base Classes
+************************************
+
+.. doxygenclass:: LAMMPS_NS::Atom
+   :project: progguide
+   :members:
+
+
+
--- a/doc/src/pg_dev_classes.rst
+++ b/doc/src/pg_dev_classes.rst
@ -0,0 +1,38 @@
+LAMMPS C++ base classes
+=======================
+
+LAMMPS is designed to be used as a C++ class library where one can set
+up and drive a simulation through creating a class instance and then
+calling some abstract operations or commands on that class or its member
+class instances.  These are interfaced to the :doc:`C library API
+<pg_library>`, which providing an additional level of abstraction
+simplification for common operations. The C API is also the basis for
+calling LAMMPS from Python or Fortran.
+
+When used from a C++ program, most of the symbols and functions in
+LAMMPS are wrapped into the ``LAMMPS_NS`` namespace so they will not
+collide with your own classes or other libraries. This, however, does
+not extend to the additional libraries bundled with LAMMPS in the lib
+folder and some of the low-level code of some packages.
+
+Behind the scenes this is implemented through inheritance and
+polymorphism where base classes define the abstract interface and
+derived classes provide the specialized implementation for specific
+models or optimizations or ports to accelerator platforms.  This
+document will provide an outline of the fundamental class hierarchy and
+some selected examples for derived classes of specific models.
+
+.. note::
+
+   Please see the :ref:`note about thread-safety <thread-safety>`
+   in the library Howto doc page.
+
+-----------------------------------
+
+.. toctree::
+   :caption: Individual Base Classes
+   :name: lammpsbase
+
+   pg_lammps
+   pg_atom
+   pg_input
--- a/doc/src/pg_dev_flow.rst
+++ b/doc/src/pg_dev_flow.rst
@ -0,0 +1,236 @@
+How a timestep works
+====================
+
+The first and most fundamental operation within LAMMPS to understand is
+how a timestep is structured.  Timestepping is performed by calling
+methods of the Integrate class instance within the Update class.  Since
+Integrate is a base class, it will point to an instance of a derived
+class corresponding to what is selected by the :doc:`run_style
+<run_style>` input script command.
+
+In this section, the timestep implemented by the Verlet class is
+described.  A similar timestep protocol is implemented by the Respa
+class, for the r-RESPA hierarchical timestepping method.
+
+The Min base class performs energy minimization, so does not perform a
+literal timestep.  But it has logic similar to what is described here,
+to compute forces and invoke fixes at each iteration of a minimization.
+Differences between time integration and minimization are highlighted at
+the end of this section.
+
+The Verlet class is encoded in the ``src/verlet.cpp`` and ``verlet.h``
+files.  It implements the velocity-Verlet timestepping algorithm.  The
+workhorse method is ``Verlet::run()``, but first we highlight several
+other methods in the class.
+
+- The ``init()`` method is called at the beginning of each dynamics
+  run.  It simply sets some internal flags, based on user settings in
+  other parts of the code.
+
+- The ``setup()`` or ``setup_minimal()`` methods are also called before
+  each run.  The velocity-Verlet method requires current forces be
+  calculated before the first timestep, so these routines compute
+  forces due to all atomic interactions, using the same logic that
+  appears in the timestepping described next.  A few fixes are also
+  invoked, using the mechanism described in the next section.  Various
+  counters are also initialized before the run begins.  The
+  ``setup_minimal()`` method is a variant that has a flag for performing
+  less setup.  This is used when runs are continued and information
+  from the previous run is still valid.  For example, if repeated
+  short LAMMPS runs are being invoked, interleaved by other commands,
+  via the *pre no* and *every* options of the run command, the
+  ``setup_minimal()`` method is used.
+
+- The ``force_clear()`` method initializes force and other arrays to
+  zero before each timestep, so that forces (torques, etc) can be
+  accumulated.
+
+Now for the ``Verlet::run()`` method.  Its basic structure in hi-level pseudo
+code is shown below.  In the actual code in ``src/verlet.cpp`` some of
+these operations are conditionally invoked.
+
+.. code-block:: python
+
+   loop over N timesteps:
+     if timeout condition: break
+     ev_set()
+
+     fix->initial_integrate()
+     fix->post_integrate()
+
+     nflag = neighbor->decide()
+     if nflag:
+       fix->pre_exchange()
+       domain->pbc()
+       domain->reset_box()
+       comm->setup()
+       neighbor->setup_bins()
+       comm->exchange()
+       comm->borders()
+       fix->pre_neighbor()
+       neighbor->build()
+       fix->post_neighbor()
+     else:
+       comm->forward_comm()
+
+     force_clear()
+     fix->pre_force()
+
+     pair->compute()
+     bond->compute()
+     angle->compute()
+     dihedral->compute()
+     improper->compute()
+     kspace->compute()
+
+     fix->pre_reverse()
+     comm->reverse_comm()
+
+     fix->post_force()
+     fix->final_integrate()
+     fix->end_of_step()
+
+     if any output on this step:
+       output->write()
+
+   # after loop
+   fix->post_run()
+
+
+The ``ev_set()`` method (in the parent Integrate class), sets two flags
+(*eflag* and *vflag*) for energy and virial computation.  Each flag
+encodes whether global and/or per-atom energy and virial should be
+calculated on this timestep, because some fix or variable or output will
+need it.  These flags are passed to the various methods that compute
+particle interactions, so that they either compute and tally the
+corresponding data or can skip the extra calculations if the energy and
+virial are not needed.  See the comments for the ``Integrate::ev_set()``
+method which document the flag values.
+
+At various points of the timestep, fixes are invoked,
+e.g. ``fix->initial_integrate()``.  In the code, this is actually done
+via the Modify class which stores all the Fix objects and lists of which
+should be invoked at what point in the timestep.  Fixes are the LAMMPS
+mechanism for tailoring the operations of a timestep for a particular
+simulation.  As described elsewhere, each fix has one or more methods,
+each of which is invoked at a specific stage of the timestep, as show in
+the timestep pseudo-code.  All the active fixes defined in an input
+script, that are flagged to have an ``initial_integrate()`` method are
+invoked at the beginning of each timestep.  Examples are :doc:`fix nve
+<fix_nve>` or :doc:`fix nvt or fix npt <fix_nh>` which perform the
+start-of-timestep velocity-Verlet integration operations to update
+velocities by a half-step, and coordinates by a full step.  The
+``post_integrate()`` method is next for operations that need to happen
+immediately after those updates.  Only a few fixes use this, e.g. to
+reflect particles off box boundaries in the :doc:`FixWallReflect class
+<fix_wall_reflect>`.
+
+The ``decide()`` method in the Neighbor class determines whether
+neighbor lists need to be rebuilt on the current timestep (conditions
+can be changed using the :doc:`neigh_modify every/delay/check
+<neigh_modify>` command.  If not, coordinates of ghost atoms are
+acquired by each processor via the ``forward_comm()`` method of the Comm
+class.  If neighbor lists need to be built, several operations within
+the inner if clause of the pseudo-code are first invoked.  The
+``pre_exchange()`` method of any defined fixes is invoked first.
+Typically this inserts or deletes particles from the system.
+
+Periodic boundary conditions are then applied by the Domain class via
+its ``pbc()`` method to remap particles that have moved outside the
+simulation box back into the box.  Note that this is not done every
+timestep, but only when neighbor lists are rebuilt.  This is so that
+each processor's sub-domain will have consistent (nearby) atom
+coordinates for its owned and ghost atoms.  It is also why dumped atom
+coordinates may be slightly outside the simulation box if not dumped
+on a step where the neighbor lists are rebuilt.
+
+The box boundaries are then reset (if needed) via the ``reset_box()``
+method of the Domain class, e.g. if box boundaries are shrink-wrapped to
+current particle coordinates.  A change in the box size or shape
+requires internal information for communicating ghost atoms (Comm class)
+and neighbor list bins (Neighbor class) be updated.  The ``setup()``
+method of the Comm class and ``setup_bins()`` method of the Neighbor
+class perform the update.
+
+The code is now ready to migrate atoms that have left a processor's
+geometric sub-domain to new processors.  The ``exchange()`` method of
+the Comm class performs this operation.  The ``borders()`` method of the
+Comm class then identifies ghost atoms surrounding each processor's
+sub-domain and communicates ghost atom information to neighboring
+processors.  It does this by looping over all the atoms owned by a
+processor to make lists of those to send to each neighbor processor.  On
+subsequent timesteps, the lists are used by the ``Comm::forward_comm()``
+method.
+
+Fixes with a ``pre_neighbor()`` method are then called.  These typically
+re-build some data structure stored by the fix that depends on the
+current atoms owned by each processor.
+
+Now that each processor has a current list of its owned and ghost
+atoms, LAMMPS is ready to rebuild neighbor lists via the ``build()``
+method of the Neighbor class.  This is typically done by binning all
+owned and ghost atoms, and scanning a stencil of bins around each
+owned atom's bin to make a Verlet list of neighboring atoms within the
+force cutoff plus neighbor skin distance.
+
+In the next portion of the timestep, all interaction forces between
+particles are computed, after zeroing the per-atom force vector via the
+``force_clear()`` method.  If the newton flag is set to *on* by the
+newton command, forces are added to both owned and ghost atoms, otherwise
+only to owned (aka local) atoms.
+
+Pairwise forces are calculated first, which enables the global virial
+(if requested) to be calculated cheaply (at O(N) cost instead of O(N**2)
+at the end of the ``Pair::compute()`` method), by a dot product of atom
+coordinates and forces.  By including owned and ghost atoms in the dot
+product, the effect of periodic boundary conditions is correctly
+accounted for.  Molecular topology interactions (bonds, angles,
+dihedrals, impropers) are calculated next (if supported by the current
+atom style).  The final contribution is from long-range Coulombic
+interactions, invoked by the KSpace class.
+
+The ``pre_reverse()`` method in fixes is used for operations that have to
+be done *before* the upcoming reverse communication (e.g. to perform
+additional data transfers or reductions for data computed during the
+force computation and stored with ghost atoms).
+
+If the newton flag is on, forces on ghost atoms are communicated and
+summed back to their corresponding owned atoms.  The ``reverse_comm()``
+method of the Comm class performs this operation, which is essentially
+the inverse operation of sending copies of owned atom coordinates to
+other processor's ghost atoms.
+
+At this point in the timestep, the total force on each (local) atom is
+known.  Additional force constraints (external forces, SHAKE, etc) are
+applied by Fixes that have a ``post_force()`` method.  The second half
+of the velocity-Verlet integration, ``final_integrate()`` is then
+performed (another half-step update of the velocities) via fixes like
+nve, nvt, npt.
+
+At the end of the timestep, fixes that contain an ``end_of_step()``
+method are invoked.  These typically perform a diagnostic calculation,
+e.g. the ave/time and ave/spatial fixes.  The final operation of the
+timestep is to perform any requested output, via the ``write()`` method
+of the Output class.  There are 3 kinds of LAMMPS output: thermodynamic
+output to the screen and log file, snapshots of atom data to a dump
+file, and restart files.  See the :doc:`thermo_style <thermo_style>`,
+:doc:`dump <dump>`, and :doc:`restart <restart>` commands for more
+details.
+
+The the flow of control during energy minimization iterations is
+similar to that of a molecular dynamics timestep.  Forces are computed,
+neighbor lists are built as needed, atoms migrate to new processors, and
+atom coordinates and forces are communicated to neighboring processors.
+The only difference is what Fix class operations are invoked when.  Only
+a subset of LAMMPS fixes are useful during energy minimization, as
+explained in their individual doc pages.  The relevant Fix class methods
+are ``min_pre_exchange()``, ``min_pre_force()``, and ``min_post_force()``.
+Each fix is invoked at the appropriate place within the minimization
+iteration.  For example, the ``min_post_force()`` method is analogous to
+the ``post_force()`` method for dynamics; it is used to alter or constrain
+forces on each atom, which affects the minimization procedure.
+
+After all iterations are completed there is a ``cleanup`` step which
+calls the ``post_run()`` method of fixes to perform operations only required
+at the end of a calculations (like freeing temporary storage or creating
+final outputs).
--- a/doc/src/pg_dev_org.rst
+++ b/doc/src/pg_dev_org.rst
@ -0,0 +1,250 @@
+LAMMPS source files
+===================
+
+The source files of the LAMMPS code are found in two
+directories of the distribution: ``src`` and ``lib``.
+Most of the code is C++ but there are small numbers of files
+in several other languages.
+
+The core of the code is located in the
+``src`` folder and its sub-directories.
+A sizable number of these files are in the ``src`` directory
+itself, but there are plenty of :doc:`packages <Packages>`, which can be
+included or excluded when LAMMPS is built.  See the :doc:`Include
+packages in build <Build_package>` section of the manual for more
+information about that part of the build process.  LAMMPS currently
+supports building with :doc:`conventional makefiles <Build_make>` and
+through :doc:`CMake <Build_cmake>` which differ in how packages are
+enabled or disabled for a LAMMPS binary.  The source files for each
+package are in all-uppercase sub-directories of the ``src`` folder, for
+example ``src/MOLECULE`` or ``src/USER-MISC``.  The ``src/STUBS``
+sub-directory is not a package but contains a dummy MPI library, that is
+used when building a serial version of the code. The ``src/MAKE``
+directory contains makefiles with settings and flags for a variety of
+configuration and machines for the build process with traditional
+makefiles.
+
+The ``lib`` directory contains the source code for several supporting
+libraries or files with configuration settings to use globally installed
+libraries, that are required by some of the optional packages.
+Each sub-directory, like ``lib/poems`` or ``lib/gpu``, contains the
+source files, some of which are in different languages such as Fortran
+or CUDA. These libraries are linked to during a LAMMPS build, if the
+corresponding package is installed.
+
+LAMMPS C++ source files almost always come in pairs, such as
+``src/run.cpp`` (implementation file) and ``src/run.h`` (header file).
+Each pair of files defines a C++
+class, for example the :cpp:class:`LAMMPS_NS::Run` class which contains
+the code invoked by the :doc:`run <run>` command in a LAMMPS input script.
+As this example illustrates, source file and class names often have a
+one-to-one correspondence with a command used in a LAMMPS input script.
+Some source files and classes do not have a corresponding input script
+command, e.g. ``src/force.cpp`` and the :cpp:class:`LAMMPS_NS::Force`
+class.  They are discussed in the next section.
+
+A small number of C++ classes and utility functions are implemented with
+only a ``.h`` file. Examples are the Pointer class or the MathVec functions.
+
+LAMMPS class topology
+=====================
+
+Though LAMMPS has a lot of source files and classes, its class topology
+is relative flat, as outlined in the :ref:`class-topology` figure.  Each
+name refers to a class and has a pair of associated source files in the
+``src`` folder, for example the class :cpp:class:`LAMMPS_NS::Memory`
+corresponds to the files ``memory.cpp`` and ``memory.h``, or the class
+:cpp:class:`LAMMPS_NS::AtomVec` corresponds to the files
+``atom_vec.cpp`` and ``atom_vec.h``.  Full lines in the figure represent
+compositing: that is the class to the left holds a pointer to an
+instance of the class to the right.  Dashed lines instead represent
+inheritance: the class to the right is derived from the class on the
+left. Classes with a red boundary are not instantiated directly, but
+they represent the base classes for "styles".  Those "styles" make up
+the bulk of the LAMMPS code and only a few typical examples are included
+in the figure for demonstration purposes.
+
+.. _class-topology:
+.. figure:: JPG/lammps-classes.png
+
+   LAMMPS class topology
+
+   This figure shows some of the relations of the base classes of the
+   LAMMPS simulation package.  Full lines indicate that a class holds an
+   instance of the class it is pointing to; dashed lines point to
+   derived classes that are given as examples of what classes may be
+   instantiated during a LAMMPS run based on the input commands and
+   accessed through the API define by their respective base classes.  At
+   the core is the :cpp:class:`LAMMPS <LAMMPS_NS::LAMMPS>` class, which
+   holds pointers to class instances with specific purposes.  Those may
+   hold instances of other classes, sometimes directly, or only
+   temporarily, sometimes as derived classes or derived classes or
+   derived classes, which may also hold instances of other classes.
+
+The :cpp:class:`LAMMPS_NS::LAMMPS` class is the topmost class and
+represents what is referred to an "instance" of LAMMPS.  It is a
+composite holding references to instances of other core classes
+providing the core functionality of the MD engine in LAMMPS and through
+them abstractions of the required operations.  The constructor of the
+LAMMPS class will instantiate those instances, process the command line
+flags, initialize MPI (if not already done) and set up file pointers for
+input and output. The destructor will shut everything down and free all
+associated memory.  Thus code for the standalone LAMMPS executable in
+``main.cpp`` simply initializes MPI, instantiates a single instance of
+LAMMPS, and passes it the command line flags and input script. It
+deletes the LAMMPS instance after the method reading the input returns
+and shuts down the MPI environment before it exits the executable.
+
+The :cpp:class:`LAMMPS_NS::Pointers` is not shown in the
+:ref:`class-topology` figure, it holds references to members of the
+`LAMMPS_NS::LAMMPS`, so that all classes derived from
+:cpp:class:`LAMMPS_NS::Pointers` have direct access to those reference.
+From the class topology all classes with blue boundary are referenced in
+this class and all classes in the second and third columns, that are not
+listed as derived classes are instead derived from
+:cpp:class:`LAMMPS_NS::Pointers`.
+
+Since all storage is encapsulated, the LAMMPS class can also be
+instantiated multiple times by a calling code, and that can be either
+simultaneously or consecutively.  When running in parallel with MPI,
+care has to be taken, that suitable communicators are used to not
+create conflicts between different instances.
+
+The LAMMPS class currently holds instances of 19 classes representing
+different core functionalities There are a handful of virtual parent
+classes in LAMMPS that define what LAMMPS calls ``styles``.  They are
+shaded red in the :ref:`class-topology` figure.  Each of these are
+parents of a number of child classes that implement the interface
+defined by the parent class.  There are two main categories of these
+``styles``: some may only have one instance active at a time (e.g. atom,
+pair, bond, angle, dihedral, improper, kspace, comm) and there is a
+dedicated pointer variable in the composite class that manages them.
+Setups that require a mix of different such styles have to use a
+*hybrid* class that manages and forwards calls to the corresponding
+sub-styles for the designated subset of atoms or data. or the composite
+class may have lists of class instances, e.g. Modify handles lists of
+compute and fix styles, while Output handles dumps class instances.
+
+The exception to this scheme are the ``command`` style classes. These
+implement specific commands that can be invoked before, after, or between
+runs or are commands which launch a simulation.  For these an instance
+of the class is created, its command() method called and then, after
+completion, the class instance deleted.  Examples for this are the
+create_box, create_atoms, minimize, run, or velocity command styles.
+
+For all those ``styles`` certain naming conventions are employed: for
+the fix nve command the class is called FixNVE and the files are
+``fix_nve.h`` and ``fix_nve.cpp``. Similarly for fix ave/time we have
+FixAveTime and ``fix_ave_time.h`` and ``fix_ave_time.cpp``. Style names
+are lower case and without spaces or special characters. A suffix or
+multiple appended with a forward slash '/' denotes a variant of the
+corresponding class without the suffix. To connect the style name and
+the class name, LAMMPS uses macros like the following ATOM\_CLASS,
+PAIR\_CLASS, BOND\_CLASS, REGION\_CLASS, FIX\_CLASS, COMPUTE\_CLASS,
+or DUMP\_CLASS in the corresponding header file.  During compilation
+files with the pattern ``style_name.h`` are created that contain include
+statements including all headers of all styles of a given type that
+are currently active (or "installed).
+
+
+More details on individual classes in the :ref:`class-topology` are as
+follows:
+
+- The Memory class handles allocation of all large vectors and arrays.
+
+- The Error class prints all error and warning messages.
+
+- The Universe class sets up partitions of processors so that multiple
+  simulations can be run, each on a subset of the processors allocated
+  for a run, e.g. by the mpirun command.
+
+- The Input class reads and processes input input strings and files,
+  stores variables, and invokes :doc:`commands <Commands_all>`.
+
+- As discussed above, command style classes are directly derived from
+  the Pointers class. They provide input script commands that perform
+  one-time operations before/after/between simulations or which invoke a
+  simulation.  They are instantiated from within the Input class,
+  invoked, then immediately destructed.
+
+- The Finish class is instantiated to print statistics to the screen
+  after a simulation is performed, by commands like run and minimize.
+
+- The Special class walks the bond topology of a molecular system to
+  find first, second, third neighbors of each atom.  It is invoked by
+  several commands, like :doc:`read_data <read_data>`,
+  :doc:`read_restart <read_restart>`, or :doc:`replicate <replicate>`.
+
+- The Atom class stores per-atom properties associated with atom styles.
+  More precisely, they are allocated and managed by a class derived from
+  the AtomVec class, and the Atom class simply stores pointers to them.
+  The classes derived from AtomVec represent the different atom styles
+  and they are instantiated through the :doc:`atom_style <atom_style>`
+  command.
+
+- The Update class holds instances of an integrator and a minimizer
+  class.  The Integrate class is a parent style for the Verlet and
+  r-RESPA time integrators, as defined by the :doc:`run_style
+  <run_style>` command.  The Min class is a parent style for various
+  energy minimizers.
+
+- The Neighbor class builds and stores neighbor lists.  The NeighList
+  class stores a single list (for all atoms).  A NeighRequest class
+  instance is created by pair, fix, or compute styles when they need a
+  particular kind of neighbor list and use the NeighRequest properties
+  to select the neighbor list settings for the given request. There can
+  be multiple instances of the NeighRequest class and the Neighbor class
+  will try to optimize how they are computed by creating copies or
+  sub-lists where possible.
+
+- The Comm class performs inter-processor communication, typically of
+  ghost atom information.  This usually involves MPI message exchanges
+  with 6 neighboring processors in the 3d logical grid of processors
+  mapped to the simulation box. There are two :doc:`communication styles
+  <comm_style>` enabling different ways to do the domain decomposition.
+  Sometimes the Irregular class is used, when atoms may migrate to
+  arbitrary processors.
+
+- The Domain class stores the simulation box geometry, as well as
+  geometric Regions and any user definition of a Lattice.  The latter
+  are defined by the :doc:`region <region>` and :doc:`lattice <lattice>`
+  commands in an input script.
+
+- The Force class computes various forces between atoms.  The Pair
+  parent class is for non-bonded or pair-wise forces, which in LAMMPS
+  also includes many-body forces such as the Tersoff 3-body potential if
+  those are computed by walking pairwise neighbor lists.  The Bond,
+  Angle, Dihedral, Improper parent classes are styles for bonded
+  interactions within a static molecular topology.  The KSpace parent
+  class is for computing long-range Coulombic interactions.  One of its
+  child classes, PPPM, uses the FFT3D and Remap classes to redistribute
+  and communicate grid-based information across the parallel processors.
+
+- The Modify class stores lists of class instances derived from the
+  :doc:`Fix <fix>` and :doc:`Compute <compute>` base classes.
+
+- The Group class manipulates groups that atoms are assigned to via the
+  :doc:`group <group>` command.  It also has functions to compute
+  various attributes of groups of atoms.
+
+- The Output class is used to generate 3 kinds of output from a LAMMPS
+  simulation: thermodynamic information printed to the screen and log
+  file, dump file snapshots, and restart files.  These correspond to the
+  :doc:`Thermo <thermo_style>`, :doc:`Dump <dump>`, and
+  :doc:`WriteRestart <write_restart>` classes respectively.  The Dump
+  class is a base class with several derived classes implementing
+  various dump style variants.
+
+- The Timer class logs timing information, output at the end
+  of a run.
+
+.. TODO section on "Spatial decomposition and parallel operations"
+..       diagram of 3d processor grid, brick vs. tiled. local vs. ghost
+..       atoms, 6-way communication with pack/unpack functions,
+..       PBC as part of the communication
+
+.. TODO section on "Fixes, Computes, and Variables"
+..      how and when data is computed and provided and how it is
+..      referenced. flags in Fix/Compute/Variable classes tell
+..      style and amount of available data.
+
--- a/doc/src/pg_dev_utils.rst
+++ b/doc/src/pg_dev_utils.rst
@ -0,0 +1,417 @@
+
+LAMMPS utility functions
+========================
+
+The ``utils`` sub-namespace inside the ``LAMMPS_NS`` namespace provides
+a collection of convenience functions and utilities that perform common
+tasks that are required repeatedly throughout the LAMMPS code like
+reading or writing to files with error checking or translation of
+strings into specific types of numbers with checking for validity.  This
+reduces redundant implementations and encourages consistent behavior.
+
+I/O with status check
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These are wrappers around the corresponding C library calls like
+``fgets()`` or ``fread()``.  They will check if there were errors
+on reading or an unexpected end-of-file state was reached.  In that
+case, the functions will stop the calculation with an error message,
+indicating the name of the problematic file, if possible.
+
+----------
+
+.. doxygenfunction:: sfgets
+   :project: progguide
+
+.. doxygenfunction:: sfread
+   :project: progguide
+
+String to number conversions with validity check
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+These functions should be used to convert strings to numbers. They are
+are strongly preferred over C library calls like ``atoi()`` or
+``atof()`` since they check if the **entire** provided string is a valid
+(floating-point or integer) number, and will error out instead of
+silently returning the result of a partial conversion or zero in cases
+where the string is not a valid number.  This behavior allows to more
+easily detect typos or issues when processing input files.
+
+The *do_abort* flag should be set to ``true`` in case  this function
+is called only on a single MPI rank, as that will then trigger the
+a call to ``Error::one()`` for errors instead of ``Error::all()``
+and avoids a "hanging" calculation when run in parallel.
+
+Please also see :cpp:func:`is_integer` and :cpp:func:`is_double` for
+testing strings for compliance without conversion.
+
+----------
+
+.. doxygenfunction:: numeric
+   :project: progguide
+
+.. doxygenfunction:: inumeric
+   :project: progguide
+
+.. doxygenfunction:: bnumeric
+   :project: progguide
+
+.. doxygenfunction:: tnumeric
+   :project: progguide
+
+
+String processing
+^^^^^^^^^^^^^^^^^
+
+The following are functions to help with processing strings
+and parsing files or arguments.
+
+----------
+
+.. doxygenfunction:: trim
+   :project: progguide
+
+.. doxygenfunction:: trim_comment
+   :project: progguide
+
+.. doxygenfunction:: count_words(const char *text)
+   :project: progguide
+
+.. doxygenfunction:: count_words(const std::string &text)
+   :project: progguide
+
+.. doxygenfunction:: count_words(const std::string &text, const std::string &separators)
+   :project: progguide
+
+.. doxygenfunction:: trim_and_count_words
+   :project: progguide
+
+.. doxygenfunction:: split_words
+   :project: progguide
+
+.. doxygenfunction:: strmatch
+   :project: progguide
+
+.. doxygenfunction:: is_integer
+   :project: progguide
+
+.. doxygenfunction:: is_double
+   :project: progguide
+
+File and path functions
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: guesspath
+   :project: progguide
+
+.. doxygenfunction:: path_basename
+   :project: progguide
+
+.. doxygenfunction:: path_join
+   :project: progguide
+
+.. doxygenfunction:: file_is_readable
+   :project: progguide
+
+Potential file functions
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: get_potential_file_path
+   :project: progguide
+
+.. doxygenfunction:: get_potential_date
+   :project: progguide
+
+.. doxygenfunction:: get_potential_units
+   :project: progguide
+
+.. doxygenfunction:: get_supported_conversions
+   :project: progguide
+
+.. doxygenfunction:: get_conversion_factor
+   :project: progguide
+
+.. doxygenfunction:: open_potential(const std::string &name, LAMMPS *lmp, int *auto_convert)
+   :project: progguide
+
+Argument processing
+^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: bounds
+   :project: progguide
+
+.. doxygenfunction:: expand_args
+   :project: progguide
+
+Convenience functions
+^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: logmesg
+   :project: progguide
+
+.. doxygenfunction:: getsyserror
+   :project: progguide
+
+.. doxygenfunction:: check_packages_for_style
+   :project: progguide
+
+.. doxygenfunction:: timespec2seconds
+   :project: progguide
+
+.. doxygenfunction:: date2num
+   :project: progguide
+
+Customized standard functions
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. doxygenfunction:: merge_sort
+   :project: progguide
+
+---------------------------
+
+Tokenizer classes
+=================
+
+The purpose of the tokenizer classes is to simplify the recurring task
+of breaking lines of text down into words and/or numbers.
+Traditionally, LAMMPS code would be using the ``strtok()`` function from
+the C library for that purpose, but that function has two significant
+disadvantages: 1) it cannot be used concurrently from different LAMMPS
+instances since it stores its status in a global variable and 2) it
+modifies the string that it is processing.  These classes were
+implemented to avoid both of these issues and also to reduce the amount
+of code that needs to be written.
+
+The basic procedure is to create an instance of the tokenizer class with
+the string to be processed as an argument and then do a loop until all
+available tokens are read.  The constructor has a default set of
+separator characters, but that can be overridden. The default separators
+are all "whitespace" characters, i.e. the space character, the tabulator
+character, the carriage return character, the linefeed character, and
+the form feed character.
+
+.. code-block:: C++
+   :caption: Tokenizer class example listing entries of the PATH environment variable
+
+   #include "tokenizer.h"
+   #include <cstdlib>
+   #include <string>
+   #include <iostream>
+
+   using namespace LAMMPS_NS;
+
+   int main(int, char **)
+   {
+       const char *path = getenv("PATH");
+
+       if (path != nullptr) {
+           Tokenizer p(path,":");
+           while (p.has_next())
+               std::cout << "Entry: " << p.next() << "\n";
+       }
+       return 0;
+   }
+
+Most tokenizer operations cannot fail except for
+:cpp:func:`LAMMPS_NS::Tokenizer::next` (when used without first
+checking with :cpp:func:`LAMMPS_NS::Tokenizer::has_next`) and
+:cpp:func:`LAMMPS_NS::Tokenizer::skip`.  In case of failure, the class
+will throw an exception, so you may need to wrap the code using the
+tokenizer into a ``try`` / ``catch`` block to handle errors.  The
+:cpp:class:`LAMMPS_NS::ValueTokenizer` class may also throw an exception
+when a (type of) number is requested as next token that is not
+compatible with the string representing the next word.
+
+.. code-block:: C++
+   :caption: ValueTokenizer class example with exception handling
+
+   #include "tokenizer.h"
+   #include <cstdlib>
+   #include <string>
+   #include <iostream>
+
+   using namespace LAMMPS_NS;
+
+   int main(int, char **)
+   {
+       const char *text = "1 2 3 4 5 20.0 21 twentytwo 2.3";
+       double num1(0),num2(0),num3(0),num4(0);
+
+       ValueTokenizer t(text);
+       // read 4 doubles after skipping over 5 numbers
+       try {
+           t.skip(5);
+           num1 = t.next_double();
+           num2 = t.next_double();
+           num3 = t.next_double();
+           num4 = t.next_double();
+       } catch (TokenizerException &e) {
+           std::cout << "Reading numbers failed: " << e.what() << "\n";
+       }
+       std::cout << "Values: " << num1 << " " << num2 << " " << num3 << " " << num4 << "\n";
+       return 0;
+   }
+
+This code example should produce the following output:
+
+.. code-block::
+
+   Reading numbers failed: Not a valid floating-point number: 'twentytwo'
+   Values: 20 21 0 0
+
+----------
+
+.. doxygenclass:: LAMMPS_NS::Tokenizer
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::TokenizerException
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::ValueTokenizer
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::InvalidIntegerException
+   :project: progguide
+   :members: what
+
+.. doxygenclass:: LAMMPS_NS::InvalidFloatException
+   :project: progguide
+   :members: what
+
+File reader classes
+====================
+
+The purpose of the file reader classes is to simplify the recurring task
+of reading and parsing files. They can use the
+:cpp:class:`LAMMPS_NS::ValueTokenizer` class to process the read in
+text.  The :cpp:class:`LAMMPS_NS::TextFileReader` is a more general
+version while :cpp:class:`LAMMPS_NS::PotentialFileReader` is specialized
+to implement the behavior expected for looking up and reading/parsing
+files with potential parameters in LAMMPS.  The potential file reader
+class requires a LAMMPS instance, requires to be run on MPI rank 0 only,
+will use the :cpp:func:`LAMMPS_NS::utils::get_potential_file_path`
+function to look up and open the file, and will call the
+:cpp:class:`LAMMPS_NS::Error` class in case of failures to read or to
+convert numbers, so that LAMMPS will be aborted.
+
+.. code-block:: C++
+   :caption: Use of PotentialFileReader class in pair style coul/streitz
+
+    PotentialFileReader reader(lmp, file, "coul/streitz");
+    char * line;
+
+    while((line = reader.next_line(NPARAMS_PER_LINE))) {
+      try {
+        ValueTokenizer values(line);
+        std::string iname = values.next_string();
+
+        int ielement;
+        for (ielement = 0; ielement < nelements; ielement++)
+          if (iname == elements[ielement]) break;
+
+        if (nparams == maxparam) {
+          maxparam += DELTA;
+          params = (Param *) memory->srealloc(params,maxparam*sizeof(Param),
+                                              "pair:params");
+        }
+
+        params[nparams].ielement = ielement;
+        params[nparams].chi = values.next_double();
+        params[nparams].eta = values.next_double();
+        params[nparams].gamma = values.next_double();
+        params[nparams].zeta = values.next_double();
+        params[nparams].zcore = values.next_double();
+
+      } catch (TokenizerException & e) {
+        error->one(FLERR, e.what());
+      }
+      nparams++;
+    }
+
+A file that would be parsed by the reader code fragment looks like this:
+
+   # DATE: 2015-02-19 UNITS: metal CONTRIBUTOR: Ray Shan CITATION: Streitz and Mintmire, Phys Rev B, 50, 11996-12003 (1994)
+   #
+   # X (eV)                J (eV)          gamma (1/\AA)   zeta (1/\AA)    Z (e)
+
+   Al      0.000000        10.328655       0.000000        0.968438        0.763905
+   O       5.484763        14.035715       0.000000        2.143957        0.000000
+
+
+----------
+
+.. doxygenclass:: LAMMPS_NS::TextFileReader
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::PotentialFileReader
+   :project: progguide
+   :members:
+
+
+----------
+
+Memory pool classes
+===================
+
+The memory pool classes are used for cases where otherwise many
+small memory allocations would be needed and where the data would
+be either all used or all freed.  One example for that is the
+storage of neighbor lists.  The memory management strategy is
+based on the assumption that allocations will be in chunks of similar
+sizes.  The allocation is then not done per individual call for a
+reserved chunk of memory, but for a "page" that can hold multiple
+chunks of data.  A parameter for the maximum chunk size must be
+provided, as that is used to determine whether a new page of memory
+must be used.
+
+The :cpp:class:`MyPage <LAMMPS_NS::MyPage>` class offers two ways to
+reserve a chunk: 1) with :cpp:func:`get() <LAMMPS_NS::MyPage::get>` the
+chunk size needs to be known in advance, 2) with :cpp:func:`vget()
+<LAMMPS_NS::MyPage::vget>` a pointer to the next chunk is returned, but
+its size is registered later with :cpp:func:`vgot()
+<LAMMPS_NS::MyPage::vgot>`.
+
+.. code-block:: C++
+   :caption: Example of using :cpp:class:`MyPage <LAMMPS_NS::MyPage>`
+
+      #include "my_page.h"
+      using namespace LAMMPS_NS;
+
+      MyPage<double> *dpage = new MyPage<double>;
+      // max size of chunk: 256, size of page: 10240 doubles (=81920 bytes)
+      dpage->init(256,10240);
+
+      double **build_some_lists(int num)
+      {
+          dpage->reset();
+          double **dlist = new double*[num];
+          for (int i=0; i < num; ++i) {
+              double *dptr = dpage.vget();
+              int jnum = 0;
+              for (int j=0; j < jmax; ++j) {
+                  // compute some dvalue for eligible loop index j
+                  dptr[j] = dvalue;
+                  ++jnum;
+              }
+              if (dpage.status() != 0) {
+                  // handle out of memory or jnum too large errors
+              }
+              dpage.vgot(jnum);
+              dlist[i] = dptr;
+          }
+          return dlist;
+      }
+
+----------
+
+.. doxygenclass:: LAMMPS_NS::MyPage
+   :project: progguide
+   :members:
+
+.. doxygenclass:: LAMMPS_NS::MyPoolChunk
+   :project: progguide
+   :members:
--- a/doc/src/pg_dev_write.rst
+++ b/doc/src/pg_dev_write.rst
@ -0,0 +1,253 @@
+Writing LAMMPS styles
+=====================
+
+The :doc:`Modify` section of the manual gives an overview of how LAMMPS can
+be extended by writing new classes that derive from existing
+parent classes in LAMMPS.  Here, some specific coding
+details are provided for writing code for LAMMPS.
+
+Writing a new fix style
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Writing fixes is a flexible way of extending LAMMPS.  Users can
+implement many things using fixes:
+
+- changing particles attributes (positions, velocities, forces, etc.). Examples: FixNVE, FixFreeze.
+- reading/writing data. Example: FixRestart.
+- adding or modifying properties due to geometry. Example: FixWall.
+- interacting with other subsystems or external code: Examples: FixTTM, FixExternal, FixLATTE
+- saving information for analysis or future use (previous positions,
+  for instance). Examples: Fix AveTime, FixStoreState.
+
+
+All fixes are derived from the Fix base class and must have a
+constructor with the signature: ``FixPrintVel(class LAMMPS *, int, char **)``.
+
+Every fix must be registered in LAMMPS by writing the following lines
+of code in the header before include guards:
+
+.. code-block:: c
+
+   #ifdef FIX_CLASS
+   FixStyle(print/vel,FixPrintVel)
+   #else
+   /* the definition of the FixPrintVel class comes here */
+   ...
+   #endif
+
+Where ``print/vel`` is the style name of your fix in the input script and
+``FixPrintVel`` is the name of the class. The header file would be called
+``fix_print_vel.h`` and the implementation file ``fix_print_vel.cpp``.
+These conventions allow LAMMPS to automatically integrate it into the
+executable when compiling and associate your new fix class with the designated
+keyword when it parses the input script.
+
+Let's write a simple fix which will print the average velocity at the end
+of each timestep. First of all, implement a constructor:
+
+.. code-block:: C++
+
+   FixPrintVel::FixPrintVel(LAMMPS *lmp, int narg, char **arg)
+   : Fix(lmp, narg, arg)
+   {
+     if (narg < 4)
+       error->all(FLERR,"Illegal fix print/vel command");
+
+     nevery = force->inumeric(FLERR,arg[3]);
+     if (nevery <= 0)
+       error->all(FLERR,"Illegal fix print/vel command");
+   }
+
+In the constructor you should parse your fix arguments which are
+specified in the script. All fixes have pretty the same syntax:
+``fix <fix-ID> <fix group> <fix name> <fix arguments ...>``. The
+first 3 parameters are parsed by Fix base class constructor, while
+``<fix arguments>`` should be parsed by you. In our case, we need to
+specify how often we want to print an average velocity. For instance,
+once in 50 timesteps: ``fix 1 print/vel 50``. There is a special variable
+in the Fix class called ``nevery`` which specifies how often the method
+``end_of_step()`` is called. Thus all we need to do is just set it up.
+
+The next method we need to implement is ``setmask()``:
+
+.. code-block:: C++
+
+   int FixPrintVel::setmask()
+   {
+     int mask = 0;
+     mask |= FixConst::END_OF_STEP;
+     return mask;
+   }
+
+Here the user specifies which methods of your fix should be called
+during execution. The constant ``END_OF_STEP`` corresponds to the
+``end_of_step()`` method. The most important available methods that
+are called during a timestep and the order in which they are called
+are shown in the previous section.
+
+.. code-block:: C++
+
+   void FixPrintVel::end_of_step()
+   {
+     // for add3, scale3
+     using namespace MathExtra;
+
+     double** v = atom->v;
+     int nlocal = atom->nlocal;
+     double localAvgVel[4]; // 4th element for particles count
+     memset(localAvgVel, 0, 4 * sizeof(double));
+     for (int particleInd = 0; particleInd < nlocal; ++particleInd) {
+       add3(localAvgVel, v[particleInd], localAvgVel);
+     }
+     localAvgVel[3] = nlocal;
+     double globalAvgVel[4];
+     memset(globalAvgVel, 0, 4 * sizeof(double));
+     MPI_Allreduce(localAvgVel, globalAvgVel, 4, MPI_DOUBLE, MPI_SUM, world);
+     scale3(1.0 / globalAvgVel[3], globalAvgVel);
+     if ((comm->me == 0) && screen) {
+       fmt::print(screen,"{}, {}, {}\n",
+                  globalAvgVel[0], globalAvgVel[1], globalAvgVel[2]);
+     }
+   }
+
+In the code above, we use MathExtra routines defined in
+``math_extra.h``.  There are bunch of math functions to work with
+arrays of doubles as with math vectors.  It is also important to note
+that LAMMPS code should always assume to be run in parallel and that
+atom data is thus distributed across the MPI ranks.  Thus you can
+only process data from local atoms directly and need to use MPI library
+calls to combine or exchange data.  For serial execution, LAMMPS
+comes bundled with the MPI STUBS library that contains the MPI library
+function calls in dummy versions that only work for a single MPI rank.
+
+In this code we use an instance of Atom class. This object is stored
+in the Pointers class (see ``pointers.h``) which is the base class of
+the Fix base class. This object contains references to various class
+instances (the original instances are created and held by the LAMMPS
+class) with all global information about the simulation system.
+Data from the Pointers class is available to all classes inherited from
+it using protected inheritance. Hence when you write you own class,
+which is going to use LAMMPS data, don't forget to inherit from Pointers
+or pass an Pointer to it to all functions that need access. When writing
+fixes we inherit from class Fix which is inherited from Pointers so
+there is no need to inherit from it directly.
+
+The code above computes average velocity for all particles in the
+simulation.  Yet you have one unused parameter in fix call from the
+script: ``group_name``.  This parameter specifies the group of atoms
+used in the fix. So we should compute average for all particles in the
+simulation only if ``group_name == "all"``, but it can be any group.
+The group membership information of an atom is contained in the *mask*
+property of and atom and the bit corresponding to a given group is
+stored in the groupbit variable which is defined in Fix base class:
+
+.. code-block:: C++
+
+   for (int i = 0; i < nlocal; ++i) {
+     if (atom->mask[i] & groupbit) {
+     // Do all job here
+     }
+   }
+
+Class Atom encapsulates atoms positions, velocities, forces, etc. User
+can access them using particle index. Note, that particle indexes are
+usually changed every few timesteps because of neighbor list rebuilds
+and spatial sorting (to improve cache efficiency).
+
+Let us consider another Fix example: We want to have a fix which stores
+atoms position from previous time step in your fix. The local atoms
+indexes may not be valid on the next iteration. In order to handle
+this situation there are several methods which should be implemented:
+
+- ``double memory_usage()``: return how much memory the fix uses (optional)
+- ``void grow_arrays(int)``: do reallocation of the per particle arrays in your fix
+- ``void copy_arrays(int i, int j, int delflag)``: copy i-th per-particle
+  information to j-th. Used when atom sorting is performed. if delflag is set
+  and atom j owns a body, move the body information to atom i.
+- ``void set_arrays(int i)``: sets i-th particle related information to zero
+
+Note, that if your class implements these methods, it must call add calls of
+add_callback and delete_callback to constructor and destructor. Since we want
+to store positions of atoms from previous timestep, we need to add
+``double** xold`` to the header file. Than add allocation code
+to the constructor:
+
+.. code-block:: C++
+
+   FixSavePos::FixSavePos(LAMMPS *lmp, int narg, char **arg), xold(nullptr)
+   {
+   //...
+     memory->create(xold, atom->nmax, 3, "FixSavePos:x");
+     atom->add_callback(0);
+   }
+
+   FixSavePos::~FixSavePos() {
+     atom->delete_callback(id, 0);
+     memory->destroy(xold);
+   }
+
+Implement the aforementioned methods:
+
+.. code-block:: C++
+
+   double FixSavePos::memory_usage()
+   {
+     int nmax = atom->nmax;
+     double bytes = 0.0;
+     bytes += nmax * 3 * sizeof(double);
+     return bytes;
+   }
+
+   void FixSavePos::grow_arrays(int nmax)
+   {
+     memory->grow(xold, nmax, 3, "FixSavePos:xold");
+   }
+
+   void FixSavePos::copy_arrays(int i, int j, int delflag)
+   {
+     memcpy(xold[j], xold[i], sizeof(double) * 3);
+   }
+
+   void FixSavePos::set_arrays(int i)
+   {
+     memset(xold[i], 0, sizeof(double) * 3);
+   }
+
+   int FixSavePos::pack_exchange(int i, double *buf)
+   {
+     int m = 0;
+     buf[m++] = xold[i][0];
+     buf[m++] = xold[i][1];
+     buf[m++] = xold[i][2];
+
+     return m;
+   }
+
+   int FixSavePos::unpack_exchange(int nlocal, double *buf)
+   {
+     int m = 0;
+     xold[nlocal][0] = buf[m++];
+     xold[nlocal][1] = buf[m++];
+     xold[nlocal][2] = buf[m++];
+
+     return m;
+   }
+
+Now, a little bit about memory allocation. We use the Memory class which
+is just a bunch of template functions for allocating 1D and 2D
+arrays. So you need to add include ``memory.h`` to have access to them.
+
+Finally, if you need to write/read some global information used in
+your fix to the restart file, you might do it by setting flag
+``restart_global = 1`` in the constructor and implementing methods void
+``write_restart(FILE *fp)`` and ``void restart(char *buf)``.
+If, in addition, you want to write the per-atom property to restart
+files additional settings and functions are needed:
+
+- a fix flag indicating this needs to be set ``restart_peratom = 1;``
+- ``atom->add_callback()`` and ``atom->delete_callback()`` must be called
+  a second time with the final argument set to 1 instead of 0 (indicating
+  restart processing instead of per-atom data memory management).
+- the functions ``void pack_restart(int i, double *buf)`` and
+  ``void unpack_restart(int nlocal, int nth)`` need to be implemented
+
--- a/doc/src/pg_developer.rst
+++ b/doc/src/pg_developer.rst
--- a/doc/src/pg_input.rst
+++ b/doc/src/pg_input.rst
@ -0,0 +1,7 @@
+LAMMPS Input Base Class
+************************
+
+.. doxygenclass:: LAMMPS_NS::Input
+      :project: progguide
+      :members:
+
--- a/doc/src/pg_lammps.rst
+++ b/doc/src/pg_lammps.rst
@ -0,0 +1,22 @@
+LAMMPS Class
+************
+
+The LAMMPS class is encapsulating an MD simulation state and thus it is
+the class that needs to be created when starting a new simulation system
+state.  The LAMMPS executable essentially creates one instance of this
+class and passes the command line flags and tells it to process the
+provided input (a file or ``stdin``).  It shuts the class down when
+control is returned to it and then exits.  When using LAMMPS as a
+library from another code it is required to create an instance of this
+class, either directly from C++ with ``new LAMMPS()`` or through one
+of the library interface functions like :cpp:func:`lammps_open` of the
+C-library interface, or the :py:class:`lammps.lammps` class constructor
+of the Python module, or the :f:func:`lammps` constructor of the Fortran
+module.
+
+--------------------
+
+.. doxygenclass:: LAMMPS_NS::LAMMPS
+   :project: progguide
+   :members:
+
--- a/doc/utils/requirements.txt
+++ b/doc/utils/requirements.txt
@ -1,5 +1,6 @@
 Sphinx
 sphinxcontrib-spelling
 git+https://github.com/akohlmey/sphinx-fortran@parallel-read
+sphinx_tabs
 breathe
 Pygments
--- a/doc/utils/sphinx-config/_themes/lammps_theme/static/css/lammps.css
+++ b/doc/utils/sphinx-config/_themes/lammps_theme/static/css/lammps.css
@ -3,3 +3,7 @@
    display: block;
    margin-bottom: 0.809em;
 }
+
+.versionmodified {
+    font-weight: bold;
+}
--- a/doc/utils/sphinx-config/conf.py.in
+++ b/doc/utils/sphinx-config/conf.py.in
@ -47,7 +47,9 @@ extensions = [
    'sphinx.ext.imgmath',
    'sphinx.ext.autodoc',
    'sphinxfortran.fortran_domain',
+    'sphinx_tabs.tabs',
    'table_from_list',
+    'tab_or_note',
    'breathe',
 ]
 # 2017-12-07: commented out, since this package is broken with Sphinx 16.x
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@ -1321,6 +1321,7 @@ initializations
 initio
 InP
 inregion
+instantiation
 Institut
 integrators
 Integrators
@ -1777,6 +1778,7 @@ Mattox
 Mattson
 maxangle
 maxbond
+maxchunk
 maxelt
 maxeval
 maxfiles
@ -2002,6 +2004,7 @@ MxN
 myCompute
 myIndex
 mylammps
+MyPool
 mysocket
 myTemp
 myVec
@ -3492,6 +3495,7 @@ zz
 Zm
 PowerShell
 filesystems
+Zstandard
 Zstd
 zstd
 checksum
--- a/fortran/lammps.f90
+++ b/fortran/lammps.f90
@ -54,12 +54,11 @@ MODULE LIBLAMMPS

  ! interface definitions for calling functions in library.cpp
  INTERFACE
-      FUNCTION lammps_open(argc,argv,comm,handle) &
+      FUNCTION lammps_open(argc,argv,comm) &
          BIND(C, name='lammps_open_fortran')
        IMPORT :: c_ptr, c_int
        INTEGER(c_int), VALUE, INTENT(in)     :: argc, comm
        TYPE(c_ptr), DIMENSION(*), INTENT(in) :: argv
-        TYPE(c_ptr), INTENT(out)              :: handle
        TYPE(c_ptr)                           :: lammps_open
      END FUNCTION lammps_open

@ -161,7 +160,7 @@ CONTAINS
    ENDIF

    IF (PRESENT(comm)) THEN
-        lmp_open%handle = lammps_open(argc,argv,comm,dummy)
+        lmp_open%handle = lammps_open(argc,argv,comm)
    ELSE
        lmp_open%handle = lammps_open_no_mpi(argc,argv,dummy)
    END IF
--- a/lib/compress/Makefile.lammps
+++ b/lib/compress/Makefile.lammps
@ -1,21 +1,24 @@
 # This file contains the settings to build and link LAMMPS with
 # support for data compression libraries.
-# 
+#
 # When you build LAMMPS with the COMPRESS package installed, it will
 # use the 3 settings in this file.  They should be set as follows.
-# 
-# The compress_SYSLIB setting is for linking the compression library.
-# By default, the setting will point to zlib (-lz).
-# 
-# The compress_SYSINC and compress_SYSPATH variables do not typically need
-# to be set, as compression libraries are usually installed as packages
-# in system locations. Otherwise, specify its directory via the
-# compress_SYSPATH variable, e.g. -Ldir or compress_SYSINC variable( -Idir)
+#
+# The compress_SYSLIB setting is for linking the compression libraries.
+# By default, the setting will point to zlib (-lz). For including
+# Zstandard support add -DLAMMPS_ZSTD to compress_SYSINC and also
+# add -lzstd to compress_SYSLIB to link to the library.
+#
+# The compress_SYSINC and compress_SYSPATH variables typically do not
+# need any additional settings, as compression libraries are usually
+# installed as packages in system locations. Otherwise, specify its
+# library directory via the compress_SYSPATH variable, e.g. -Ldir or
+# its include directory via the compress_SYSINC variable( -Idir)

 # -----------------------------------------------------------

 # Settings that the LAMMPS build will import when this package is installed

-compress_SYSINC =
-compress_SYSLIB = -lz
+compress_SYSINC = # -DLAMMPS_ZSTD
+compress_SYSLIB = -lz # -lzstd
 compress_SYSPATH =
--- a/src/Purge.list
+++ b/src/Purge.list
@ -49,6 +49,8 @@ packages_ntopo.h
 # other auto-generated files
 lmpinstalledpkgs.h
 lmpgitversion.h
+# removed on 9 Sep 2020
+mergesort.h
 # renamed on 8 May 2020
 fix_meso.cpp
 fix_meso.h
--- a/src/REPLICA/fix_hyper_local.cpp
+++ b/src/REPLICA/fix_hyper_local.cpp
@ -164,7 +164,7 @@ FixHyperLocal::FixHyperLocal(LAMMPS *lmp, int narg, char **arg) :
  maxbondperatom = FCCBONDS;
  numcoeff = NULL;
  clist = NULL;
-  cpage = new MyPage<OneCoeff>;
+  cpage = new MyPage<HyperOneCoeff>;
  cpage->init(maxbondperatom,1024*maxbondperatom,1);

  // set comm sizes needed by this fix
@ -976,7 +976,7 @@ void FixHyperLocal::build_bond_list(int natom)
    memory->sfree(clist);
    maxcoeff = atom->nmax;
    memory->create(numcoeff,maxcoeff,"hyper/local:numcoeff");
-    clist = (OneCoeff **) memory->smalloc(maxcoeff*sizeof(OneCoeff *),
+    clist = (HyperOneCoeff **) memory->smalloc(maxcoeff*sizeof(HyperOneCoeff *),
                                         "hyper/local:clist");
  }

@ -1741,7 +1741,7 @@ double FixHyperLocal::memory_usage()
  bytes += 2*maxall * sizeof(double);             // maxstrain,maxstrain_domain
  if (checkbias) bytes += maxall * sizeof(tagint);  // biasflag
  bytes += maxcoeff * sizeof(int);                // numcoeff
-  bytes += maxcoeff * sizeof(OneCoeff *);         // clist
-  bytes += maxlocal*maxbondperatom * sizeof(OneCoeff);  // cpage estimate
+  bytes += maxcoeff * sizeof(HyperOneCoeff *);         // clist
+  bytes += maxlocal*maxbondperatom * sizeof(HyperOneCoeff);  // cpage estimate
  return bytes;
 }
--- a/src/REPLICA/fix_hyper_local.h
+++ b/src/REPLICA/fix_hyper_local.h
@ -23,6 +23,8 @@ FixStyle(hyper/local,FixHyperLocal)
 #include "fix_hyper.h"

 namespace LAMMPS_NS {
+  // forward declaration. struct HyperOneCoeff is defined in my_page.h
+  struct HyperOneCoeff;

 class FixHyperLocal : public FixHyper {
 public:
@ -183,13 +185,8 @@ class FixHyperLocal : public FixHyper {

  // data structs for persisting bias coeffs when bond list is reformed

-  struct OneCoeff {
-    double biascoeff;
-    tagint tag;
-  };
-
-  MyPage<OneCoeff> *cpage;     // pages of OneCoeff datums for clist
-  OneCoeff **clist;            // ptrs to vectors of bias coeffs for each atom
+  MyPage<HyperOneCoeff> *cpage;// pages of OneCoeff datums for clist
+  HyperOneCoeff **clist;       // ptrs to vectors of bias coeffs for each atom
  int *numcoeff;               // # of bias coeffs per atom (one per bond)
  int maxcoeff;                // allocate sized of clist and numcoeff

--- a/src/atom.cpp
+++ b/src/atom.cpp
@ -47,6 +47,33 @@ using namespace MathConst;

 /* ---------------------------------------------------------------------- */

+/** \class LAMMPS_NS::Atom
+ *  \brief Class to provide access to atom data
+
+\verbatim embed:rst
+The Atom class provides access to atom style related global settings and
+per-atom data that is stored with atoms and migrates with them from
+sub-domain to sub-domain as atoms move around.  This includes topology
+data, which is stored with either one specific atom or all atoms involved
+depending on the settings of the :doc:`newton command <newton>`.
+
+The actual per-atom data is allocated and managed by one of the various
+classes derived from the AtomVec class as determined by
+the :doc:`atom_style command <atom_style>`.  The pointers in the Atom class
+are updated by the AtomVec class as needed.
+\endverbatim
+ */
+
+/** Atom class constructor
+ *
+ * This resets and initialized all kinds of settings,
+ * parameters, and pointer variables for per-atom arrays.
+ * This also initializes the factory for creating
+ * instances of classes derived from the AtomVec base
+ * class, which correspond to the selected atom style.
+ *
+ * \param  lmp  pointer to the base LAMMPS class */
+
 Atom::Atom(LAMMPS *lmp) : Pointers(lmp)
 {
  natoms = 0;
@ -688,7 +715,6 @@ AtomVec *Atom::avec_creator(LAMMPS *lmp)
  return new T(lmp);
 }

-
 /* ---------------------------------------------------------------------- */

 void Atom::init()
@ -2301,12 +2327,17 @@ int Atom::find_custom(const char *name, int &flag)
  return -1;
 }

-/* ----------------------------------------------------------------------
-   add a custom variable with name of type flag = 0/1 for int/double
-   assumes name does not already exist
-   return index in ivector or dvector of its location
------------------------------------------------------------------------- */
+/** \brief Add a custom per-atom property with the given name and type
+\verbatim embed:rst

+This function will add a custom per-atom property with the name "name"
+as either list of int or double to the list of custom properties.  This
+function is called, e.g. from :doc:`fix property/atom <fix_property_atom>`.
+\endverbatim
+ * \param name Name of the property (w/o a "d_" or "i_" prefix)
+ * \param flag Data type of property: 0 for int, 1 for double
+ * \return Index of property in the respective list of properties
+ */
 int Atom::add_custom(const char *name, int flag)
 {
  int index;
@ -2338,12 +2369,19 @@ int Atom::add_custom(const char *name, int flag)
  return index;
 }

-/* ----------------------------------------------------------------------
-   remove a custom variable of type flag = 0/1 for int/double at index
-   free memory for vector and name and set ptrs to NULL
-   ivector/dvector and iname/dname lists never shrink
------------------------------------------------------------------------- */
-
+/*! \brief Remove a custom per-atom property of a given type
+ *
+\verbatim embed:rst
+This will remove a property that was requested e.g. by the
+:doc:`fix property/atom <fix_property_atom>` command.  It frees the
+allocated memory and sets the pointer to ``NULL`` to the entry in
+the list can be reused. The lists of those pointers will never be
+compacted or never shrink, so that index to name mappings remain valid.
+\endverbatim
+ *
+ * \param flag whether the property is integer (=0) or double (=1)
+ * \param index of that property in the respective list.
+ */
 void Atom::remove_custom(int flag, int index)
 {
  if (flag == 0) {
@ -2359,16 +2397,123 @@ void Atom::remove_custom(int flag, int index)
  }
 }

-/* ----------------------------------------------------------------------
-   return a pointer to a named internal variable
-   if don't recognize name, return NULL
------------------------------------------------------------------------- */
+/** Provide access to internal data of the Atom class by keyword
+ *
+\verbatim embed:rst
+
+This function is a way to access internal per-atom data.  This data is
+distributed across MPI ranks and thus only the data for "local" atoms
+are expected to be available.  Whether also data for "ghost" atoms is
+stored and up-to-date depends on various simulation settings.
+
+This table lists a large part of the supported names, their data types,
+length of the data area, and a short description.
+
+.. list-table::
+   :header-rows: 1
+   :widths: auto
+
+   * - Name
+     - Type
+     - Items per atom
+     - Description
+   * - mass
+     - double
+     - 1
+     - per-type mass. This array is **NOT** a per-atom array
+       but of length ``ntypes+1``, element 0 is ignored.
+   * - id
+     - tagint
+     - 1
+     - atom ID of the particles
+   * - type
+     - int
+     - 1
+     - atom type of the particles
+   * - mask
+     - int
+     - 1
+     - bitmask for mapping to groups. Individual bits are set
+       to 0 or 1 for each group.
+   * - image
+     - imageint
+     - 1
+     - 3 image flags encoded into a single integer.
+       See :cpp:func:`lammps_encode_image_flags`.
+   * - x
+     - double
+     - 3
+     - x-, y-, and z-coordinate of the particles
+   * - v
+     - double
+     - 3
+     - x-, y-, and z-component of the velocity of the particles
+   * - f
+     - double
+     - 3
+     - x-, y-, and z-component of the force on the particles
+   * - molecule
+     - int
+     - 1
+     - molecule ID of the particles
+   * - q
+     - double
+     - 1
+     - charge of the particles
+   * - mu
+     - double
+     - 3
+     - dipole moment of the particles
+   * - omega
+     - double
+     - 3
+     - x-, y-, and z-component of rotational velocity of the particles
+   * - angmom
+     - double
+     - 3
+     - x-, y-, and z-component of angular momentum of the particles
+   * - torque
+     - double
+     - 3
+     - x-, y-, and z-component of the torque on the particles
+   * - radius
+     - double
+     - 1
+     - radius of the (extended) particles
+   * - rmass
+     - double
+     - 1
+     - per-atom mass of the particles. ``NULL`` if per-type masses are
+       used. See the :cpp:func:`rmass_flag<lammps_extract_setting>` setting.
+   * - ellipsoid
+     - int
+     - 1
+     - 1 if the particle is an ellipsoidal particle, 0 if not
+   * - line
+     - int
+     - 1
+     - 1 if the particle is a line particle, 0 if not
+   * - tri
+     - int
+     - 1
+     - 1 if the particle is a triangulated particle, 0 if not
+   * - body
+     - int
+     - 1
+     - 1 if the particle is a body particle, 0 if not
+
+\endverbatim
+ *
+ * \param  name  string with the keyword of the desired property.
+                 Typically the name of the pointer variable returned
+ * \return       pointer to the requested data cast to ``void *`` or NULL */

 void *Atom::extract(char *name)
 {
  // --------------------------------------------------------------------
  // 4th customization section: customize by adding new variable name

+  /* NOTE: this array is only of length ntypes+1 */
  if (strcmp(name,"mass") == 0) return (void *) mass;

  if (strcmp(name,"id") == 0) return (void *) tag;
@ -2389,6 +2534,7 @@ void *Atom::extract(char *name)
  if (strcmp(name,"ellipsoid") == 0) return (void *) ellipsoid;
  if (strcmp(name,"line") == 0) return (void *) line;
  if (strcmp(name,"tri") == 0) return (void *) tri;
+  if (strcmp(name,"body") == 0) return (void *) body;

  if (strcmp(name,"vfrac") == 0) return (void *) vfrac;
  if (strcmp(name,"s0") == 0) return (void *) s0;
--- a/src/atom_vec_body.cpp
+++ b/src/atom_vec_body.cpp
@ -555,7 +555,7 @@ bigint AtomVecBody::memory_usage_bonus()
 {
  bigint bytes = 0;
  bytes += nmax_bonus*sizeof(Bonus);
-  bytes += icp->size + dcp->size;
+  bytes += icp->size() + dcp->size();

  int nall = nlocal_bonus + nghost_bonus;
  for (int i = 0; i < nall; i++) {
--- a/src/dump.cpp
+++ b/src/dump.cpp
@ -32,8 +32,6 @@ using namespace LAMMPS_NS;
 #if defined(LMP_QSORT)
 // allocate space for static class variable
 Dump *Dump::dumpptr;
-#else
-#include "mergesort.h"
 #endif

 #define BIG 1.0e20
@ -766,9 +764,9 @@ void Dump::sort()
 #else
  if (!reorderflag) {
    for (i = 0; i < nme; i++) index[i] = i;
-    if (sortcol == 0) merge_sort(index,nme,(void *)this,idcompare);
-    else if (sortorder == ASCEND) merge_sort(index,nme,(void *)this,bufcompare);
-    else merge_sort(index,nme,(void *)this,bufcompare_reverse);
+    if (sortcol == 0) utils::merge_sort(index,nme,(void *)this,idcompare);
+    else if (sortorder == ASCEND) utils::merge_sort(index,nme,(void *)this,bufcompare);
+    else utils::merge_sort(index,nme,(void *)this,bufcompare_reverse);
  }
 #endif

--- a/src/input.cpp
+++ b/src/input.cpp
@ -60,6 +60,37 @@ using namespace LAMMPS_NS;

 /* ---------------------------------------------------------------------- */

+/** \class LAMMPS_NS::Input
+ *  \brief Class for processing commands and input files
+ *
+\verbatim embed:rst
+
+The Input class contains methods for reading, pre-processing and
+parsing LAMMPS commands and input files and will dispatch commands
+to the respective class instances or contains the code to execute
+the commands directly.  It also contains the instance of the
+Variable class which performs computations and text substitutions.
+
+\endverbatim */
+
+/** Input class constructor
+ *
+\verbatim embed:rst
+
+This sets up the input processing, processes the *-var* and *-echo*
+command line flags, holds the factory of commands and creates and
+initializes an instance of the Variable class.
+
+To execute a command, a specific class instance, derived from
+:cpp:class:`Pointers`, is created, then its ``command()`` member
+function executed, and finally the class instance is deleted.
+
+\endverbatim
+ *
+ * \param  lmp   pointer to the base LAMMPS class
+ * \param  argc  number of entries in *argv*
+ * \param  argv  argument vector  */
+
 Input::Input(LAMMPS *lmp, int argc, char **argv) : Pointers(lmp)
 {
  MPI_Comm_rank(world,&me);
@ -137,10 +168,15 @@ Input::~Input()
  delete command_map;
 }

-/* ----------------------------------------------------------------------
-   process all input from infile
-   infile = stdin or file if command-line arg "-in" was used
------------------------------------------------------------------------- */
+/** Process all input from the ``FILE *`` pointer *infile*
+ *
+\verbatim embed:rst
+
+This will read lines from *infile*, parse and execute them until the end
+of the file is reached.  The *infile* pointer will usually point to
+``stdin`` or the input file given with the ``-in`` command line flag.
+
+\endverbatim */

 void Input::file()
 {
@ -229,10 +265,21 @@ void Input::file()
  }
 }

-/* ----------------------------------------------------------------------
-   process all input from file at filename
-   mostly called from library interface
------------------------------------------------------------------------- */
+/** Process all input from the file *filename*
+ *
+\verbatim embed:rst
+
+This function opens the file at the path *filename*, put the current
+file pointer stored in *infile* on a stack and instead assign *infile*
+with the newly opened file pointer.  Then it will call the
+:cpp:func:`Input::file() <LAMMPS_NS::Input::file()>` function to read,
+parse and execute the contents of that file.  When the end of the file
+is reached, it is closed and the previous file pointer from the infile
+file pointer stack restored to *infile*.
+
+\endverbatim
+ *
+ * \param  filename  name of file with LAMMPS commands */

 void Input::file(const char *filename)
 {
@ -263,11 +310,19 @@ void Input::file(const char *filename)
  }
 }

-/* ----------------------------------------------------------------------
-   invoke one command in single
-   first copy to line, then parse, then execute it
-   return command name to caller
------------------------------------------------------------------------- */
+/** Process a single command from a string in *single*
+ *
+\verbatim embed:rst
+
+This function takes the text in *single*, makes a copy, parses that,
+executes the command and returns the name of the command (without the
+arguments).  If there was no command in *single* it will return
+``NULL``.
+
+\endverbatim
+ *
+ * \param  single  string with LAMMPS command
+ * \return         string with name of the parsed command w/o arguments */

 char *Input::one(const std::string &single)
 {
--- a/src/irregular.cpp
+++ b/src/irregular.cpp
@ -31,7 +31,6 @@ using namespace LAMMPS_NS;
 int *Irregular::proc_recv_copy;
 static int compare_standalone(const void *, const void *);
 #else
-#include "mergesort.h"
 // prototype for non-class function
 static int compare_standalone(const int, const int, void *);
 #endif
@ -441,7 +440,7 @@ int Irregular::create_atom(int n, int *sizes, int *proclist, int sortflag)
    proc_recv_copy = proc_recv;
    qsort(order,nrecv_proc,sizeof(int),compare_standalone);
 #else
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+    utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
 #endif

    int j;
@ -715,7 +714,7 @@ int Irregular::create_data(int n, int *proclist, int sortflag)
    proc_recv_copy = proc_recv;
    qsort(order,nrecv_proc,sizeof(int),compare_standalone);
 #else
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+    utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
 #endif

    int j;
@ -889,7 +888,7 @@ int Irregular::create_data_grouped(int n, int *procs, int sortflag)
    proc_recv_copy = proc_recv;
    qsort(order,nrecv_proc,sizeof(int),compare_standalone);
 #else
-    merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
+    utils::merge_sort(order,nrecv_proc,(void *)proc_recv,compare_standalone);
 #endif

    int j;
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@ -80,14 +80,30 @@ struct LAMMPS_NS::package_styles_lists {

 using namespace LAMMPS_NS;

-/* ----------------------------------------------------------------------
-   start up LAMMPS
-   allocate fundamental classes (memory, error, universe, input)
-   parse input switches
-   initialize communicators, screen & logfile output
-   input is allocated at end after MPI info is setup
------------------------------------------------------------------------- */
+/** \class LAMMPS_NS::LAMMPS
+ * \brief LAMMPS simulation instance
+ *
+ * The LAMMPS class contains pointers of all constituent class instances
+ * and global variables that are used by a LAMMPS simulation. Its contents
+ * represent the entire state of the simulation.
+ *
+ * The LAMMPS class manages the components of an MD simulation by creating,
+ * deleting, and initializing instances of the classes it is composed of,
+ * processing command line flags, and providing access to some global properties.
+ * The specifics of setting up and running a simulation are handled by the
+ * individual component class instances. */

+/** Create a LAMMPS simulation instance
+ *
+ * The LAMMPS constructor starts up a simulation by allocating all
+ * fundamental classes in the necessary order, parses input switches
+ * and their arguments, initializes communicators, screen and logfile
+ * output FILE pointers.
+ *
+ * \param narg number of arguments
+ * \param arg list of arguments
+ * \param communicator MPI communicator used by this LAMMPS instance
+ */
 LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
  memory(NULL), error(NULL), universe(NULL), input(NULL), atom(NULL),
  update(NULL), neighbor(NULL), comm(NULL), domain(NULL), force(NULL),
@ -636,14 +652,13 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator) :
  }
 }

-/* ----------------------------------------------------------------------
-   shutdown LAMMPS
-   delete top-level classes
-   close screen and log files in world and universe
-   output files were already closed in destroy()
-   delete fundamental classes
------------------------------------------------------------------------- */
-
+/** Shut down a LAMMPS simulation instance
+ *
+ * The LAMMPS destructor shuts down the simulation by deleting top-level class
+ * instances, closing screen and log files for the global instance (aka "world")
+ * and files and MPI communicators in sub-partitions ("universes"). Then it
+ * deletes the fundamental class instances and copies of data inside the class.
+ */
 LAMMPS::~LAMMPS()
 {
  const int me = comm->me;
@ -989,6 +1004,11 @@ void _noopt LAMMPS::init_pkg_lists()
 #undef REGION_CLASS
 }

+/** Return true if a LAMMPS package is enabled in this binary
+ *
+ * \param pkg name of package
+ * \return true if yes, else false
+ */
 bool LAMMPS::is_installed_pkg(const char *pkg)
 {
  for (int i=0; installed_packages[i] != NULL; ++i)
@ -1005,6 +1025,16 @@ bool LAMMPS::is_installed_pkg(const char *pkg)
    }                                                                   \
  }

+/** \brief Return name of package that a specific style belongs to
+ *
+ * This function checks the given name against all list of styles
+ * for all type of styles and if the name and the style match, it
+ * returns which package this style belongs to.
+ *
+ * \param style Type of style (e.g. atom, pair, fix, etc.)
+ * \param name Name of style
+ * \return Name of the package this style is part of
+ */
 const char *LAMMPS::match_style(const char *style, const char *name)
 {
  check_for_match(angle,style,name);
--- a/src/lammps.h
+++ b/src/lammps.h
@ -85,8 +85,10 @@ class LAMMPS {
  struct package_styles_lists *pkg_lists;
  void init_pkg_lists();
  void help();
-  LAMMPS() {};                   // prohibit using the default constructor
-  LAMMPS(const LAMMPS &) {};     // prohibit using the copy constructor
+  /// Default constructor. Declared private to prohibit its use
+  LAMMPS() {};
+  /// Copy constructor. Declared private to prohibit its use
+  LAMMPS(const LAMMPS &) {};
 };

 }
--- a/src/library.cpp
+++ b/src/library.cpp
@ -108,18 +108,23 @@ thus is otherwise ignored.  However ``argc`` may be set to 0 and then
 ``argv`` may be ``NULL``.  If MPI is not yet initialized, ``MPI_Init()``
 will be called during creation of the LAMMPS class instance.

-The function returns a pointer to the created LAMMPS class. If for some
-reason the initialization of the LAMMPS instance fails, the function
-returns ``NULL``.  For backward compatibility it is also possible to
-provide the address of a pointer variable as argument *ptr*\ . This
-argument may be ``NULL`` and is then ignored.
+If for some reason the creation or initialization of the LAMMPS instance
+fails a null pointer is returned.
+
+.. versionchanged:: 15Sep2020
+
+   This function now has the pointer to the created LAMMPS class
+   instance as return value.  For backward compatibility it is still
+   possible to provide the address of a pointer variable as final
+   argument *ptr*\ .  This use is deprecated and may be removed in
+   the future.  The *ptr* argument may be ``NULL`` and is then ignored.

 .. note::

   This function is not declared when the code linking to the LAMMPS
   library interface is compiled with ``-DLAMMPS_LIB_NO_MPI``, or
   contains a ``#define LAMMPS_LIB_NO_MPI 1`` statement before
-   ``#include "library.h"``.  In that case, you need to use the
+   ``#include "library.h"``.  In that case, you must use the
   :cpp:func:`lammps_open_no_mpi` function.

 \endverbatim
@ -169,6 +174,17 @@ library was compiled in serial mode, but the calling code runs in
 parallel and the ``MPI_Comm`` data type of the STUBS library would not
 be compatible with that of the calling code.

+If for some reason the creation or initialization of the LAMMPS instance
+fails a null pointer is returned.
+
+.. versionchanged:: 15Sep2020
+
+   This function now has the pointer to the created LAMMPS class
+   instance as return value.  For backward compatibility it is still
+   possible to provide the address of a pointer variable as final
+   argument *ptr*\ .  This use is deprecated and may be removed in
+   the future.  The *ptr* argument may be ``NULL`` and is then ignored.
+
 \endverbatim
 *
 * \param  argc  number of command line arguments
@ -195,20 +211,23 @@ module.  Internally it converts the *f_comm* argument into a C-style MPI
 communicator with ``MPI_Comm_f2c()`` and then calls
 :cpp:func:`lammps_open`.

+If for some reason the creation or initialization of the LAMMPS instance
+fails a null pointer is returned.
+
+.. versionadded:: 15Sep2020
+
 \endverbatim
 *
 * \param  argc   number of command line arguments
 * \param  argv   list of command line argument strings
 * \param  f_comm Fortran style MPI communicator for this LAMMPS instance
- * \param  ptr    pointer to a void pointer variable
- *                which serves as a handle; may be ``NULL``
 * \return        pointer to new LAMMPS instance cast to ``void *`` */

-void *lammps_open_fortran(int argc, char **argv, int f_comm, void **ptr)
+void *lammps_open_fortran(int argc, char **argv, int f_comm)
 {
  lammps_mpi_init();
  MPI_Comm c_comm = MPI_Comm_f2c((MPI_Fint)f_comm);
-  return lammps_open(argc, argv, c_comm, ptr);
+  return lammps_open(argc, argv, c_comm, nullptr);
 }

 /* ---------------------------------------------------------------------- */
@ -244,6 +263,8 @@ The MPI standard requires that any MPI application must call
 calls.  This function checks, whether MPI is already initialized and
 calls ``MPI_Init()`` in case it is not.

+.. versionadded:: 15Sep2020
+
 \endverbatim */

 void lammps_mpi_init()
@ -274,6 +295,8 @@ before exiting the program to wait until all (parallel) tasks are
 completed and then MPI is cleanly shut down.  After this function no
 more MPI calls may be made.

+.. versionadded:: 15Sep2020
+
 \endverbatim */

 void lammps_mpi_finalize()
--- a/src/library.h
+++ b/src/library.h
@ -77,7 +77,7 @@ extern "C" {
 void *lammps_open(int argc, char **argv, MPI_Comm comm, void **ptr);
 #endif
 void *lammps_open_no_mpi(int argc, char **argv, void **ptr);
-void *lammps_open_fortran(int argc, char **argv, int f_comm, void **ptr);
+void *lammps_open_fortran(int argc, char **argv, int f_comm);
 void  lammps_close(void *handle);
 void  lammps_mpi_init();
 void  lammps_mpi_finalize();
--- a/src/mergesort.h
+++ b/src/mergesort.h
@ -1,124 +0,0 @@
-/* -*- c++ -*- ----------------------------------------------------------
-   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
-   http://lammps.sandia.gov, Sandia National Laboratories
-   Steve Plimpton, sjplimp@sandia.gov
-
-   Copyright (2003) Sandia Corporation.  Under the terms of Contract
-   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
-   certain rights in this software.  This software is distributed under
-   the GNU General Public License.
-
-   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-#ifndef LMP_MERGESORT
-#define LMP_MERGESORT
-
-#include <cstring>
-
-// custom hybrid upward merge sort implementation with support to pass
-// an opaque pointer to the comparison function, e.g. for access to
-// class members. this avoids having to use global variables.
-// for improved performance, we employ an in-place insertion sort on
-// chunks of up to 64 elements and switch to merge sort from then on.
-
-// part 1. insertion sort for pre-sorting of small chunks
-
-static void insertion_sort(int *index, int num, void *ptr,
-                           int (*comp)(int, int, void*))
-{
-  if (num < 2) return;
-  for (int i=1; i < num; ++i) {
-    int tmp = index[i];
-    for (int j=i-1; j >= 0; --j) {
-      if ((*comp)(index[j],tmp,ptr) > 0) {
-        index[j+1] = index[j];
-      } else {
-        index[j+1] = tmp;
-        break;
-      }
-      if (j == 0) index[0] = tmp;
-    }
-  }
-}
-
-// part 2. merge two sublists
-
-static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
-                     void *ptr, int (*comp)(int, int, void *))
-{
-  int i = llo;
-  int l = llo;
-  int r = rlo;
-  while ((l < lhi) && (r < rhi)) {
-    if ((*comp)(buf[l],buf[r],ptr) < 0)
-      idx[i++] = buf[l++];
-    else idx[i++] = buf[r++];
-  }
-
-  while (l < lhi) idx[i++] = buf[l++];
-  while (r < rhi) idx[i++] = buf[r++];
-}
-
-// part 3: loop over sublists doubling in size with each iteration.
-//         pre-sort sublists with insertion sort for better performance.
-
-static void merge_sort(int *index, int num, void *ptr,
-                       int (*comp)(int, int, void *))
-{
-  if (num < 2) return;
-
-  int chunk,i,j;
-
-  // do insertion sort on chunks of up to 64 elements
-
-  chunk = 64;
-  for (i=0; i < num; i += chunk) {
-    j = (i+chunk > num) ? num-i : chunk;
-    insertion_sort(index+i,j,ptr,comp);
-  }
-
-  // already done?
-
-  if (chunk >= num) return;
-
-  // continue with merge sort on the pre-sorted chunks.
-  // we need an extra buffer for temporary storage and two
-  // pointers to operate on, so we can swap the pointers
-  // rather than copying to the hold buffer in each pass
-
-  int *buf = new int[num];
-  int *dest = index;
-  int *hold = buf;
-
-  while (chunk < num) {
-    int m;
-
-    // swap hold and destination buffer
-
-    int *tmp = dest; dest = hold; hold = tmp;
-
-    // merge from hold array to destination array
-
-    for (i=0; i < num-1; i += 2*chunk) {
-      j = i + 2*chunk;
-      if (j > num) j=num;
-      m = i+chunk;
-      if (m > num) m=num;
-      do_merge(dest,hold,i,m,m,j,ptr,comp);
-    }
-
-    // copy all indices not handled by the chunked merge sort loop
-
-    for ( ; i < num ; i++ ) dest[i] = hold[i];
-    chunk *= 2;
-  }
-
-  // if the final sorted data is in buf, copy back to index
-
-  if (dest == buf) memcpy(index,buf,sizeof(int)*num);
-
-  delete[] buf;
-}
-
-#endif
--- a/src/my_page.cpp
+++ b/src/my_page.cpp
@ -0,0 +1,193 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "my_page.h"
+
+#include <cstdlib>
+
+#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
+#define LAMMPS_MEMALIGN 64
+#endif
+
+using namespace LAMMPS_NS;
+
+/** \class LAMMPS_NS::MyPage
+ * \brief Templated class for storing chunks of datums in pages.
+ *
+ * The size of the chunk may vary from call to call, but must be
+ * less or equal than the *maxchunk* setting.
+ * The chunks are not returnable like with malloc() (i.e. you cannot
+ * call free() on them individually).  One can only reset and start over.
+ * The purpose of this class is to replace many small memory allocations
+ * via malloc() with a few large ones.  Since the pages are never freed
+ * until the class is re-initialized, they can be re-used without having
+ * to re-allocate them by calling the reset() method.
+ *
+ * The settings *maxchunk*, *pagesize*, and *pagedelta* control
+ * the memory allocation strategy.  The *maxchunk* value represents
+ * the expected largest number of items per chunk.  If there is
+ * less space left on the current page, a new page is allocated
+ * for the next chunk.  The *pagesize* value represents how many
+ * items can fit on a single page.  It should have space for multiple
+ * chunks of size *maxchunk*.  The combination of these two
+ * parameters determines how much memory is wasted by either switching
+ * to the next page too soon or allocating too large pages that never
+ * get properly used.  It is an error, if a requested chunk is larger
+ * than *maxchunk*.  The *pagedelta* parameter determines how many
+ * pages are allocated in one go.  In combination with the *pagesize*
+ * setting, this determines how often blocks of memory get allocated
+ * (fewer allocations will result in faster execution).
+ *
+ * \note
+ * This is a template class with explicit instantiation. If the class
+ * is used with a new data type a new explicit instantiation may need to
+ * be added at the end of the file ``src/my_page.cpp`` to avoid symbol
+ * lookup errors. */
+
+/** Create a class instance
+ *
+ *  Need to call init() before use to define allocation settings */
+
+template <class T>
+MyPage<T>::MyPage() : ndatum(0), nchunk(0), pages(nullptr), page(nullptr),
+                      npage(0), ipage(-1), index(-1), maxchunk(-1),
+                      pagesize(-1), pagedelta(1), errorflag(0) {};
+
+template <class T>
+MyPage<T>::~MyPage() {
+  deallocate();
+}
+
+/** (Re-)initialize the set of pages and allocation parameters.
+ *
+ * This also frees all previously allocated storage and allocates
+ * the first page(s).
+ *
+ * \param  user_maxchunk   Expected maximum number of items for one chunk
+ * \param  user_pagesize   Number of items on a single memory page
+ * \param  user_pagedelta  Number of pages to allocate with one malloc
+ * \return                 1 if there were invalid parameters, 2 if there was an allocation error or 0 if successful */
+
+template<class T>
+int MyPage<T>::init(int user_maxchunk, int user_pagesize,
+           int user_pagedelta) {
+    maxchunk = user_maxchunk;
+    pagesize = user_pagesize;
+    pagedelta = user_pagedelta;
+
+    if (maxchunk <= 0 || pagesize <= 0 || pagedelta <= 0) return 1;
+    if (maxchunk > pagesize) return 1;
+
+    // free storage if re-initialized
+
+    deallocate();
+
+    // initial page allocation
+
+    allocate();
+    if (errorflag) return 2;
+    reset();
+    return 0;
+  }
+
+/** Pointer to location that can store N items.
+ *
+ * This will allocate more pages as needed.
+ * If the parameter *N* is larger than the *maxchunk*
+ * setting an error is flagged.
+ *
+ * \param  n  number of items for which storage is requested
+ * \return    memory location or null pointer, if error or allocation failed */
+
+template <class T>
+T *MyPage<T>::get(int n) {
+  if (n > maxchunk) {
+    errorflag = 1;
+    return NULL;
+  }
+  ndatum += n;
+  nchunk++;
+
+  // return pointer from current page
+  if (index+n <= pagesize) {
+    int start = index;
+    index += n;
+    return &page[start];
+  }
+
+  // allocate new page
+  ipage++;
+  if (ipage == npage) {
+    allocate();
+    if (errorflag) return NULL;
+  }
+  page = pages[ipage];
+  index = n;
+  return &page[0];
+}
+
+
+/** Reset state of memory pool without freeing any memory */
+
+template <class T>
+void MyPage<T>::reset() {
+  ndatum = nchunk = 0;
+  index = ipage = 0;
+  page = (pages != nullptr) ? pages[ipage] : nullptr;
+  errorflag = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template <class T>
+void MyPage<T>::allocate() {
+  npage += pagedelta;
+  pages = (T **) realloc(pages,npage*sizeof(T *));
+  if (!pages) {
+    errorflag = 2;
+    return;
+  }
+
+  for (int i = npage-pagedelta; i < npage; i++) {
+#if defined(LAMMPS_MEMALIGN)
+    void *ptr;
+    if (posix_memalign(&ptr, LAMMPS_MEMALIGN, pagesize*sizeof(T)))
+      errorflag = 2;
+    pages[i] = (T *) ptr;
+#else
+    pages[i] = (T *) malloc(pagesize*sizeof(T));
+    if (!pages[i]) errorflag = 2;
+#endif
+  }
+}
+
+/** Free all allocated pages of this class instance */
+
+template <class T>
+void MyPage<T>::deallocate() {
+  reset();
+  for (int i = 0; i < npage; i++) free(pages[i]);
+  free(pages);
+  pages = nullptr;
+  npage = 0;
+}
+
+// explicit instantiations
+
+namespace LAMMPS_NS {
+  template class MyPage<int>;
+  template class MyPage<long>;
+  template class MyPage<long long>;
+  template class MyPage<double>;
+  template class MyPage<HyperOneCoeff>;
+}
--- a/src/my_page.h
+++ b/src/my_page.h
@ -12,144 +12,41 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-MyPage = templated class for storing chunks of datums in pages
-  chunks are not returnable, can only reset and start over
-  replaces many small mallocs with a few large mallocs
-  pages are never freed, so can reuse w/out reallocs
-usage:
-  request one datum at a time, repeat, clear
-  request chunks of datums in each get() or vget(), repeat, clear
-  chunk size can vary from request to request
-  chunk size can be known in advance or registered after usage via vgot()
-inputs:
-   template T = one datum, e.g. int, double, struct, int[3]
-     for int[3], access datum as ivec[i][2]
-methods:
-   T *get() = return ptr to one datum
-   T *get(N) = return ptr to N datums, N < maxchunk required
-   T *vget() = return ptr to maxchunk datums, use as needed, then call vgot()
-     all gets return NULL if error encountered
-   vgot(N) = used N datums of previous vget(), N < maxchunk required
-   void init(maxchunk, pagesize, pagedelta)
-     define allocation params and allocate first page(s)
-     call right after constructor
-       can call again to reset allocation params and free previous pages
-     maxchunk = max # of datums in one chunk, default = 1
-     pagesize = # of datums in one page, default = 1024
-       should be big enough to store multiple chunks
-     pagedelta = # of pages to allocate at a time, default = 1
-     return 1 if bad params
-   void reset() = clear pages w/out freeing
-   int size() = return total size of allocated pages in bytes
-   int status() = return error status
-     0 = ok, 1 = chunksize > maxchunk, 2 = allocation error
+   templated class for storing chunks of datums in pages
 ------------------------------------------------------------------------- */

 #ifndef LAMMPS_MY_PAGE_H
 #define LAMMPS_MY_PAGE_H

-#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
-#define LAMMPS_MEMALIGN 64
-#endif
+#include "lmptype.h"

-#include <cstdlib>
 namespace LAMMPS_NS {

+struct HyperOneCoeff {
+  double biascoeff;
+  tagint tag;
+};
+
 template<class T>
 class MyPage {
 public:
  int ndatum;      // total # of stored datums
  int nchunk;      // total # of stored chunks
+  MyPage();
+  virtual ~MyPage();

-  MyPage() {
-    ndatum = nchunk = 0;
-    pages = NULL;
-    npage = 0;
-    errorflag = 0;
-  }
+  int init(int user_maxchunk=1, int user_pagesize=1024,
+           int user_pagedelta=1);

-  // (re)initialize allocation params
-  // also allocate first page(s)
+  T *get(int n=1);

-  int init(int user_maxchunk = 1, int user_pagesize = 1024,
-           int user_pagedelta = 1) {
-    maxchunk = user_maxchunk;
-    pagesize = user_pagesize;
-    pagedelta = user_pagedelta;
-
-    if (maxchunk <= 0 || pagesize <= 0 || pagedelta <= 0) return 1;
-    if (maxchunk > pagesize) return 1;
-
-    // free any previously allocated pages
-
-    for (int i = 0; i < npage; i++) free(pages[i]);
-    free(pages);
-
-    // initial page allocation
-
-    ndatum = nchunk = 0;
-    pages = NULL;
-    npage = 0;
-    allocate();
-    if (errorflag) return 2;
-    ipage = index = 0;
-    page = pages[ipage];
-    return 0;
-  }
-
-  // free all allocated pages
-
-  ~MyPage() {
-    for (int i = 0; i < npage; i++) free(pages[i]);
-    free(pages);
-  }
-
-  // get ptr to one datum
-  // return NULL if run out of memory
-
-  T *get() {
-    ndatum++;
-    nchunk++;
-    if (index < pagesize) return &page[index++];
-    ipage++;
-    if (ipage == npage) {
-      allocate();
-      if (errorflag) return NULL;
-    }
-    page = pages[ipage];
-    index = 0;
-    return &page[index++];
-  }
-
-  // get ptr to location that can store N datums
-  // error if N > maxchunk
-  // return NULL if run out of memory
-
-  T *get(int n) {
-    if (n > maxchunk) {
-      errorflag = 1;
-      return NULL;
-    }
-    ndatum += n;
-    nchunk++;
-    if (index+n <= pagesize) {
-      int start = index;
-      index += n;
-      return &page[start];
-    }
-    ipage++;
-    if (ipage == npage) {
-      allocate();
-      if (errorflag) return NULL;
-    }
-    page = pages[ipage];
-    index = n;
-    return &page[0];
-  }
-
-  // get ptr to location that can store maxchunk datums
-  // will return same ptr as previous call if vgot() not called
-  // return NULL if run out of memory
+  /** Get pointer to location that can store *maxchunk* items.
+   *
+   * This will return the same pointer as the previous call to
+   * this function unless vgot() is called afterwards to record
+   * how many items of the chunk were actually used.
+   *
+   * \return pointer to chunk of memory or null pointer if run out of memory */

  T *vget() {
    if (index+maxchunk <= pagesize) return &page[index];
@ -163,9 +60,14 @@ class MyPage {
    return &page[index];
  }

-  // increment by N = # of values stored in loc returned by vget()
-  // OK to not call if vget() ptr was not used
-  // error if N > maxchunk
+  /** Mark *N* items as used of the chunk reserved with a preceding call to vget().
+   *
+   * This will advance the internal pointer inside the current memory page.
+   * It is not necessary to call this function for *N* = 0, that is the reserved
+   * storage was not used.  A following call to vget() will then reserve the
+   * same location again.  It is an error if *N* > *maxchunk*.
+   *
+   * \param  n  Number of items used in previously reserved chunk */

  void vgot(int n) {
    if (n > maxchunk) errorflag = 1;
@ -174,25 +76,21 @@ class MyPage {
    index += n;
  }

-  // clear all pages, without freeing any memory
+  void reset();

-  void reset() {
-    ndatum = nchunk = 0;
-    index = ipage = 0;
-    page = pages[ipage];
+  /** Return total size of allocated pages
+   *
+   * \return total storage used in bytes */
+
+  double size() const {
+    return (double)npage*pagesize*sizeof(T);
  }

-  // return total size of allocated pages
+  /** Return error status
+   *
+   * \return 0 if no error, 1 requested chunk size > maxchunk, 2 if malloc failed */

-  int size() const {
-    return npage*pagesize*sizeof(T);
-  }
-
-  // return error status
-
-  int status() const {
-    return errorflag;
-  }
+  int status() const { return errorflag; }

 private:
  T **pages;      // list of allocated pages
@ -208,27 +106,8 @@ class MyPage {
  int errorflag;  // flag > 0 if error has occurred
                  // 1 = chunk size exceeded maxchunk
                  // 2 = memory allocation error
-
-  void allocate() {
-    npage += pagedelta;
-    pages = (T **) realloc(pages,npage*sizeof(T *));
-    if (!pages) {
-      errorflag = 2;
-      return;
-    }
-
-    for (int i = npage-pagedelta; i < npage; i++) {
-#if defined(LAMMPS_MEMALIGN)
-      void *ptr;
-      if (posix_memalign(&ptr, LAMMPS_MEMALIGN, pagesize*sizeof(T)))
-        errorflag = 2;
-      pages[i] = (T *) ptr;
-#else
-      pages[i] = (T *) malloc(pagesize*sizeof(T));
-      if (!pages[i]) errorflag = 2;
-#endif
-    }
-  }
+  void allocate();
+  void deallocate();
 };

 }
--- a/src/my_pool_chunk.cpp
+++ b/src/my_pool_chunk.cpp
@ -0,0 +1,228 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+   ------------------------------------------------------------------------- */
+
+#include "my_pool_chunk.h"
+
+#include <cstdlib>
+#include <cstdio>
+
+#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
+#define LAMMPS_MEMALIGN 64
+#endif
+
+using namespace LAMMPS_NS;
+
+/** \class LAMMPS_NS::MyPoolChunk
+ *  \brief Templated class for storing chunks of datums in pages
+ *
+ * The size of the chunk may vary from call to call between the
+ * *minchunk* and *maxchunk* setting.  Chunks may be returned
+ * to the pool for re-use.  Chunks can be reserved in *nbin*
+ * different sizes between *minchunk* and *maxchunk*.
+ * The *chunksperpage* setting specifies how many chunks are stored
+ * on any page and the *pagedelta* setting determines how many
+ * pages are allocated in one go.  Pages are never freed, so they
+ * can be re-used without re-allocation.
+ *
+ * \note
+ * This is a template class with explicit instantiation. If the class
+ * is used with a new data type a new explicit instantiation may need
+ * to be added at the end of the file ``src/my_pool_chunk.cpp`` to
+ * avoid symbol lookup errors. */
+
+/** Create a class instance and set memory pool parameters
+ *
+ * \param  user_minchunk      Minimal chunk size
+ * \param  user_maxchunk      Maximal chunk size
+ * \param  user_nbin          Number of bins of different chunk sizes
+ * \param  user_chunkperpage  Number of chunks per page
+ * \param  user_pagedelta     Number of pages to allocate in one go */
+
+template <class T>
+MyPoolChunk<T>::MyPoolChunk(int user_minchunk, int user_maxchunk, int user_nbin,
+                            int user_chunkperpage, int user_pagedelta) {
+  minchunk = user_minchunk;
+  maxchunk = user_maxchunk;
+  nbin = user_nbin;
+  chunkperpage = user_chunkperpage;
+  pagedelta = user_pagedelta;
+
+  errorflag = 0;
+  if (minchunk <= 0 || minchunk > maxchunk) errorflag = 1;
+  if (user_nbin <= 0 || chunkperpage <= 0 || pagedelta <= 0) errorflag = 1;
+
+  freehead = new int[nbin];
+  chunksize = new int[nbin];
+  if (!freehead || !chunksize) errorflag = 1;
+  if (errorflag) return;
+
+  // insure nbin*binsize spans minchunk to maxchunk inclusive
+
+  binsize = (maxchunk-minchunk+1) / nbin;
+  if (minchunk + nbin*binsize <= maxchunk) binsize++;
+
+  freelist = nullptr;
+  for (int ibin = 0; ibin < nbin; ibin++) {
+    freehead[ibin] = -1;
+    chunksize[ibin] = minchunk + (ibin+1)*binsize - 1;
+    if (chunksize[ibin] > maxchunk) chunksize[ibin] = maxchunk;
+  }
+
+  ndatum = nchunk = 0;
+  pages = nullptr;
+  whichbin = nullptr;
+  npage = 0;
+}
+
+/** Destroy class instance and free all allocated memory */
+template <class T>
+MyPoolChunk<T>::~MyPoolChunk() {
+  delete [] freehead;
+  delete [] chunksize;
+  if (npage) {
+    free(freelist);
+    for (int i = 0; i < npage; i++) free(pages[i]);
+    free(pages);
+    free(whichbin);
+  }
+}
+
+/** Return pointer/index of unused chunk of size maxchunk
+ *
+ * \param  index  Index of chunk in memory pool
+ * \return        Pointer to requested chunk of storage */
+
+template <class T>
+T *MyPoolChunk<T>::get(int &index) {
+  int ibin = nbin-1;
+  if (freehead[ibin] < 0) {
+    allocate(ibin);
+    if (errorflag) {
+      index = -1;
+      return nullptr;
+    }
+  }
+
+  ndatum += maxchunk;
+  nchunk++;
+  index = freehead[ibin];
+  int ipage = index/chunkperpage;
+  int ientry = index % chunkperpage;
+  freehead[ibin] = freelist[index];
+  return &pages[ipage][ientry*chunksize[ibin]];
+}
+
+/** Return pointer/index of unused chunk of size N
+ *
+ * \param  n      Size of chunk
+ * \param  index  Index of chunk in memory pool
+ * \return        Pointer to requested chunk of storage */
+
+template <class T>
+T *MyPoolChunk<T>::get(int n, int &index) {
+  if (n < minchunk || n > maxchunk) {
+    errorflag = 3;
+    index = -1;
+    return nullptr;
+  }
+
+  int ibin = (n-minchunk) / binsize;
+  if (freehead[ibin] < 0) {
+    allocate(ibin);
+    if (errorflag) {
+      index = -1;
+      return nullptr;
+    }
+  }
+
+  ndatum += n;
+  nchunk++;
+  index = freehead[ibin];
+  int ipage = index/chunkperpage;
+  int ientry = index % chunkperpage;
+  freehead[ibin] = freelist[index];
+  return &pages[ipage][ientry*chunksize[ibin]];
+}
+
+/** Put indexed chunk back into memory pool via free list
+ *
+ * \param index  Memory chunk index returned by call to get() */
+
+template <class T>
+void MyPoolChunk<T>::put(int index) {
+    if (index < 0) return;
+    int ipage = index/chunkperpage;
+    int ibin = whichbin[ipage];
+    nchunk--;
+    ndatum -= chunksize[ibin];
+    freelist[index] = freehead[ibin];
+    freehead[ibin] = index;
+  }
+
+
+template <class T>
+void MyPoolChunk<T>::allocate(int ibin) {
+  int oldpage = npage;
+  npage += pagedelta;
+  freelist = (int *) realloc(freelist,npage*chunkperpage*sizeof(int));
+  pages = (T **) realloc(pages,npage*sizeof(T *));
+  whichbin = (int *) realloc(whichbin,npage*sizeof(int));
+  if (!freelist || !pages) {
+    errorflag = 2;
+    return;
+  }
+
+  // allocate pages with appropriate chunksize for ibin
+
+  for (int i = oldpage; i < npage; i++) {
+    whichbin[i] = ibin;
+#if defined(LAMMPS_MEMALIGN)
+    void *ptr;
+    if (posix_memalign(&ptr, LAMMPS_MEMALIGN,
+                       chunkperpage*chunksize[ibin]*sizeof(T)))
+      errorflag = 2;
+    pages[i] = (T *) ptr;
+#else
+    pages[i] = (T *) malloc(chunkperpage*chunksize[ibin]*sizeof(T));
+    if (!pages[i]) errorflag = 2;
+#endif
+  }
+
+  // reset free list for unused chunks on new pages
+
+  freehead[ibin] = oldpage*chunkperpage;
+  for (int i = freehead[ibin]; i < npage*chunkperpage; i++) freelist[i] = i+1;
+  freelist[npage*chunkperpage-1] = -1;
+}
+
+/** Return total size of allocated pages
+ *
+ * \return total storage used in bytes */
+
+template <class T>
+double MyPoolChunk<T>::size() const {
+  double bytes = npage*chunkperpage*sizeof(int);
+  bytes += npage*sizeof(T *);
+  bytes += npage*sizeof(int);
+  for (int i=0; i < npage; ++i)
+    bytes += chunkperpage*chunksize[i]*sizeof(T);
+
+  return bytes;
+}
+
+// explicit instantiations
+
+namespace LAMMPS_NS {
+  template class MyPoolChunk<int>;
+  template class MyPoolChunk<double>;
+}
--- a/src/my_pool_chunk.h
+++ b/src/my_pool_chunk.h
@ -9,46 +9,11 @@
   the GNU General Public License.

   See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
-
-/* ----------------------------------------------------------------------
-MyPoolChunk = templated class for storing chunks of datums in pages
-  chunks can be returned to pool for reuse
-  chunks come in nbin different fixed sizes so can reuse
-  replaces many small mallocs with a few large mallocs
-  pages are never freed, so can reuse w/out reallocs
-usage:
-  continuously get() and put() chunks as needed
-  NOTE: could add a clear() if retain info on mapping of pages to bins
-inputs:
-   template T = one datum, e.g. int, double, struct
-   minchunk = min # of datums in one chunk, def = 1
-   maxchunk = max # of datums in one chunk, def = 1
-   nbin = # of bins between minchunk and maxchunk
-   chunkperpage = # of chunks in one page, def = 1024
-   pagedelta = # of pages to allocate at a time, def = 1
-methods:
-   T *get(index) = return ptr/index to unused chunk of size maxchunk
-   T *get(N,index) = return ptr/index to unused chunk of size N
-                     minchunk <= N <= maxchunk required
-   put(index) = return indexed chunk to pool (same index returned by get)
-   int size() = return total size of allocated pages in bytes
-public variables:
-   ndatum = total # of stored datums
-   nchunk = total # of stored chunks
-   size = total size of all allocated pages in daums
-   errorflag = flag for various error conditions
------------------------------------------------------------------------- */
+   ------------------------------------------------------------------------- */

 #ifndef LAMMPS_MY_POOL_CHUNK_H
 #define LAMMPS_MY_POOL_CHUNK_H

-#if defined(LMP_USER_INTEL) && !defined(LAMMPS_MEMALIGN) && !defined(_WIN32)
-#define LAMMPS_MEMALIGN 64
-#endif
-
-#include <cstdlib>
-
 namespace LAMMPS_NS {

 template<class T>
@ -56,113 +21,36 @@ class MyPoolChunk {
 public:
  int ndatum;      // total # of stored datums
  int nchunk;      // total # of stored chunks
-  int size;        // total size of all allocated pages in datums
-  int errorflag;   // flag > 1 if error has occurred
-                   // 1 = invalid inputs
-                   // 2 = memory allocation error
-                   // 3 = chunk size exceeded maxchunk

  MyPoolChunk(int user_minchunk = 1, int user_maxchunk = 1, int user_nbin = 1,
-              int user_chunkperpage = 1024, int user_pagedelta = 1) {
-    minchunk = user_minchunk;
-    maxchunk = user_maxchunk;
-    nbin = user_nbin;
-    chunkperpage = user_chunkperpage;
-    pagedelta = user_pagedelta;
-
-    errorflag = 0;
-    if (minchunk <= 0 || minchunk > maxchunk) errorflag = 1;
-    if (user_nbin <= 0 || chunkperpage <= 0 || pagedelta <= 0) errorflag = 1;
-
-    freehead = new int[nbin];
-    chunksize = new int[nbin];
-    if (!freehead || !chunksize) errorflag = 1;
-    if (errorflag) return;
-
-    // insure nbin*binsize spans minchunk to maxchunk inclusive
-
-    binsize = (maxchunk-minchunk+1) / nbin;
-    if (minchunk + nbin*binsize <= maxchunk) binsize++;
-
-    freelist = NULL;
-    for (int ibin = 0; ibin < nbin; ibin++) {
-      freehead[ibin] = -1;
-      chunksize[ibin] = minchunk + (ibin+1)*binsize - 1;
-      if (chunksize[ibin] > maxchunk) chunksize[ibin] = maxchunk;
-    }
-
-    ndatum = nchunk = size = 0;
-    pages = NULL;
-    whichbin = NULL;
-    npage = 0;
-  }
+              int user_chunkperpage = 1024, int user_pagedelta = 1);

  // free all allocated memory

-  ~MyPoolChunk() {
-    delete [] freehead;
-    delete [] chunksize;
-    if (npage) {
-      free(freelist);
-      for (int i = 0; i < npage; i++) free(pages[i]);
-      free(pages);
-      free(whichbin);
-    }
-  }
+  ~MyPoolChunk();

  // return pointer/index of unused chunk of size maxchunk

-  T *get(int &index) {
-    int ibin = nbin-1;
-    if (freehead[ibin] < 0) {
-      allocate(ibin);
-      if (errorflag) return NULL;
-    }
-
-    ndatum += maxchunk;
-    nchunk++;
-    index = freehead[ibin];
-    int ipage = index/chunkperpage;
-    int ientry = index % chunkperpage;
-    freehead[ibin] = freelist[index];
-    return &pages[ipage][ientry*chunksize[ibin]];
-  }
+  T *get(int &index);

  // return pointer/index of unused chunk of size N

-  T *get(int n, int &index) {
-    if (n < minchunk || n > maxchunk) {
-      errorflag = 3;
-      return NULL;
-    }
-
-    int ibin = (n-minchunk) / binsize;
-    if (freehead[ibin] < 0) {
-      allocate(ibin);
-      if (errorflag) return NULL;
-    }
-
-    ndatum += n;
-    nchunk++;
-    index = freehead[ibin];
-    int ipage = index/chunkperpage;
-    int ientry = index % chunkperpage;
-    freehead[ibin] = freelist[index];
-    return &pages[ipage][ientry*chunksize[ibin]];
-  }
+  T *get(int n, int &index);

  // return indexed chunk to pool via free list
  // index = -1 if no allocated chunk

-  void put(int index) {
-    if (index < 0) return;
-    int ipage = index/chunkperpage;
-    int ibin = whichbin[ipage];
-    nchunk--;
-    ndatum -= chunksize[ibin];
-    freelist[index] = freehead[ibin];
-    freehead[ibin] = index;
-  }
+  void put(int index);
+
+  // total memory used in bytes
+
+  double size() const;
+
+  /** Return error status
+   *
+   * \return 0 if no error, 1 if invalid input, 2 if malloc() failed, 3 if chunk > maxchunk */
+
+  int status() const { return errorflag; }

 private:
  int minchunk;       // min # of datums per chunk
@ -171,6 +59,10 @@ class MyPoolChunk {
  int chunkperpage;   // # of chunks on every page, regardless of which bin
  int pagedelta;      // # of pages to allocate at once, default = 1
  int binsize;        // delta in chunk sizes between adjacent bins
+  int errorflag;      // flag > 0 if error has occurred
+                      // 1 = invalid inputs
+                      // 2 = memory allocation error
+                      // 3 = chunk size exceeded maxchunk

  T **pages;          // list of allocated pages
  int *whichbin;      // which bin each page belongs to
@ -179,42 +71,7 @@ class MyPoolChunk {
  int *freehead;      // index of first unused chunk in each bin
  int *chunksize;     // size of chunks in each bin

-  void allocate(int ibin) {
-    int oldpage = npage;
-    npage += pagedelta;
-    freelist = (int *) realloc(freelist,npage*chunkperpage*sizeof(int));
-    pages = (T **) realloc(pages,npage*sizeof(T *));
-    whichbin = (int *) realloc(whichbin,npage*sizeof(int));
-    if (!freelist || !pages) {
-      errorflag = 2;
-      return;
-    }
-
-    // allocate pages with appropriate chunksize for ibin
-
-    for (int i = oldpage; i < npage; i++) {
-      whichbin[i] = ibin;
-#if defined(LAMMPS_MEMALIGN)
-      void *ptr;
-      if (posix_memalign(&ptr, LAMMPS_MEMALIGN,
-                         chunkperpage*chunksize[ibin]*sizeof(T)))
-        errorflag = 2;
-      pages[i] = (T *) ptr;
-#else
-      pages[i] = (T *) malloc(chunkperpage*chunksize[ibin]*sizeof(T));
-      size += chunkperpage*chunksize[ibin];
-      if (!pages[i]) errorflag = 2;
-#endif
-    }
-
-    // reset free list for unused chunks on new pages
-
-    freehead[ibin] = oldpage*chunkperpage;
-    for (int i = freehead[ibin]; i < npage*chunkperpage; i++) freelist[i] = i+1;
-    freelist[npage*chunkperpage-1] = -1;
-  }
+  void allocate(int ibin);
 };
-
 }
-
 #endif
--- a/src/reset_atom_ids.cpp
+++ b/src/reset_atom_ids.cpp
@ -34,7 +34,6 @@ using namespace LAMMPS_NS;
 ResetIDs::AtomRvous *ResetIDs::sortrvous;
 static int compare_coords(const void *, const void *);
 #else
-#include "mergesort.h"
 // prototype for non-class function
 static int compare_coords(const int, const int, void *);
 #endif
@ -509,7 +508,7 @@ int ResetIDs::sort_bins(int n, char *inbuf,
    sortrvous = in;
    qsort(order,count[ibin],sizeof(int),compare_coords);
 #else
-    merge_sort(order,count[ibin],(void *) in,compare_coords);
+    utils::merge_sort(order,count[ibin],(void *) in,compare_coords);
 #endif

    head[ibin] = last[ibin] = -1;
--- a/src/utils.cpp
+++ b/src/utils.cpp
@ -71,6 +71,16 @@ extern "C"
  static int  re_match(const char *text, const char *pattern);
 }

+////////////////////////////////////////////////////////////////////////
+// Merge sort support functions
+
+static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
+                     void *ptr, int (*comp)(int, int, void *));
+static void insertion_sort(int *index, int num, void *ptr,
+                           int (*comp)(int, int, void*));
+
+////////////////////////////////////////////////////////////////////////
+
 using namespace LAMMPS_NS;

 /** More flexible and specific matching of a string against a pattern.
@ -1011,6 +1021,113 @@ int utils::date2num(const std::string &date)
  return num;
 }

+/* ----------------------------------------------------------------------
+ * Merge sort part 1: Loop over sublists doubling in size with each iteration.
+ * Pre-sort small sublists with insertion sort for better overall performance.
+------------------------------------------------------------------------- */
+
+void utils::merge_sort(int *index, int num, void *ptr,
+                       int (*comp)(int, int, void *))
+{
+  if (num < 2) return;
+
+  int chunk,i,j;
+
+  // do insertion sort on chunks of up to 64 elements
+
+  chunk = 64;
+  for (i=0; i < num; i += chunk) {
+    j = (i+chunk > num) ? num-i : chunk;
+    insertion_sort(index+i,j,ptr,comp);
+  }
+
+  // already done?
+
+  if (chunk >= num) return;
+
+  // continue with merge sort on the pre-sorted chunks.
+  // we need an extra buffer for temporary storage and two
+  // pointers to operate on, so we can swap the pointers
+  // rather than copying to the hold buffer in each pass
+
+  int *buf = new int[num];
+  int *dest = index;
+  int *hold = buf;
+
+  while (chunk < num) {
+    int m;
+
+    // swap hold and destination buffer
+
+    int *tmp = dest; dest = hold; hold = tmp;
+
+    // merge from hold array to destination array
+
+    for (i=0; i < num-1; i += 2*chunk) {
+      j = i + 2*chunk;
+      if (j > num) j=num;
+      m = i+chunk;
+      if (m > num) m=num;
+      do_merge(dest,hold,i,m,m,j,ptr,comp);
+    }
+
+    // copy all indices not handled by the chunked merge sort loop
+
+    for ( ; i < num ; i++ ) dest[i] = hold[i];
+    chunk *= 2;
+  }
+
+  // if the final sorted data is in buf, copy back to index
+
+  if (dest == buf) memcpy(index,buf,sizeof(int)*num);
+
+  delete[] buf;
+}
+
+/* ------------------------------------------------------------------ */
+
+/* ----------------------------------------------------------------------
+ * Merge sort part 2: Insertion sort for pre-sorting of small chunks
+------------------------------------------------------------------------- */
+
+void insertion_sort(int *index, int num, void *ptr,
+                           int (*comp)(int, int, void*))
+{
+  if (num < 2) return;
+  for (int i=1; i < num; ++i) {
+    int tmp = index[i];
+    for (int j=i-1; j >= 0; --j) {
+      if ((*comp)(index[j],tmp,ptr) > 0) {
+        index[j+1] = index[j];
+      } else {
+        index[j+1] = tmp;
+        break;
+      }
+      if (j == 0) index[0] = tmp;
+    }
+  }
+}
+
+/* ----------------------------------------------------------------------
+ * Merge sort part 3: Merge two sublists
+------------------------------------------------------------------------- */
+
+static void do_merge(int *idx, int *buf, int llo, int lhi, int rlo, int rhi,
+                     void *ptr, int (*comp)(int, int, void *))
+{
+  int i = llo;
+  int l = llo;
+  int r = rlo;
+  while ((l < lhi) && (r < rhi)) {
+    if ((*comp)(buf[l],buf[r],ptr) < 0)
+      idx[i++] = buf[l++];
+    else idx[i++] = buf[r++];
+  }
+
+  while (l < lhi) idx[i++] = buf[l++];
+  while (r < rhi) idx[i++] = buf[r++];
+}
+
 /* ------------------------------------------------------------------ */

 extern "C" {
--- a/src/utils.h
+++ b/src/utils.h
@ -33,23 +33,23 @@ namespace LAMMPS_NS {
     *
     *  \param text the text to be matched against the pattern
     *  \param pattern the search pattern, which may contain regexp markers
-     *  \return true if the pattern matches, false if not
-     */
+     *  \return true if the pattern matches, false if not */
+
    bool strmatch(const std::string &text, const std::string &pattern);

    /** Send message to screen and logfile, if available
     *
     *  \param lmp   pointer to LAMMPS class instance
-     *  \param mesg  message to be printed
-     */
+     *  \param mesg  message to be printed */
+
    void logmesg(LAMMPS *lmp, const std::string &mesg);

    /** return a string representing the current system error status
     *
     *  This is a wrapper around calling strerror(errno).
     *
-     *  \return  error string
-     */
+     *  \return  error string */
+
    std::string getsyserror();

    /** safe wrapper around fgets() which aborts on errors
@ -61,8 +61,8 @@ namespace LAMMPS_NS {
     *  \param size     size of buffer s (max number of bytes read by fgets())
     *  \param fp       file pointer used by fgets()
     *  \param filename file name associated with fp (may be NULL; then LAMMPS will try to detect)
-     *  \param error    pointer to Error class instance (for abort)
-     */
+     *  \param error    pointer to Error class instance (for abort) */
+
    void sfgets(const char *srcname, int srcline, char *s, int size,
                FILE *fp, const char *filename, Error *error);

@ -76,8 +76,8 @@ namespace LAMMPS_NS {
     *  \param num      number of data elements read by fread()
     *  \param fp       file pointer used by fread()
     *  \param filename file name associated with fp (may be NULL; then LAMMPS will try to detect)
-     *  \param error    pointer to Error class instance (for abort)
-     */
+     *  \param error    pointer to Error class instance (for abort) */
+
    void sfread(const char *srcname, int srcline, void *s, size_t size,
                size_t num, FILE *fp, const char *filename, Error *error);

@ -86,8 +86,8 @@ namespace LAMMPS_NS {
     *  \param style type of style that is to be checked for
     *  \param name  name of style that was not found
     *  \param lmp   pointer to top-level LAMMPS class instance
-     *  \return string usable for error messages
-     */
+     *  \return string usable for error messages */
+
    std::string check_packages_for_style(const std::string &style,
                                         const std::string &name, LAMMPS *lmp);

@ -112,8 +112,8 @@ namespace LAMMPS_NS {
     *  \param str      string to be converted to number
     *  \param do_abort determines whether to call Error::one() or Error::all()
     *  \param lmp      pointer to top-level LAMMPS class instance
-     *  \return         integer number (regular int)
-     */
+     *  \return         integer number (regular int)  */
+
    int inumeric(const char *file, int line, const char *str,
                 bool do_abort, LAMMPS *lmp);

@ -125,8 +125,8 @@ namespace LAMMPS_NS {
     *  \param str      string to be converted to number
     *  \param do_abort determines whether to call Error::one() or Error::all()
     *  \param lmp      pointer to top-level LAMMPS class instance
-     *  \return         integer number (bigint)
-     */
+     *  \return         integer number (bigint) */
+
    bigint bnumeric(const char *file, int line, const char *str,
                    bool do_abort, LAMMPS *lmp);

@ -162,6 +162,7 @@ namespace LAMMPS_NS {
     * \param nlo      lower bound
     * \param nhi      upper bound
     * \param error    pointer to Error class for out-of-bounds messages */
+
    template <typename TYPE>
    void bounds(const char *file, int line, const std::string &str,
                bigint nmin, bigint nmax, TYPE &nlo, TYPE &nhi, Error *error);
@ -197,45 +198,45 @@ namespace LAMMPS_NS {
    /** Trim leading and trailing whitespace. Like TRIM() in Fortran.
     *
     * \param line string that should be trimmed
-     * \return new string without whitespace (string)
-     */
+     * \return new string without whitespace (string) */
+
    std::string trim(const std::string &line);

    /** Return string with anything from '#' onward removed
     *
     * \param line string that should be trimmed
-     * \return new string without comment (string)
-     */
+     * \return new string without comment (string) */
+
    std::string trim_comment(const std::string &line);

    /** Count words in string with custom choice of separating characters
     *
     * \param text string that should be searched
     * \param separators string containing characters that will be treated as whitespace
-     * \return number of words found
-     */
+     * \return number of words found */
+
    size_t count_words(const std::string &text, const std::string &separators);

    /** Count words in string, ignore any whitespace matching " \t\r\n\f"
     *
     * \param text string that should be searched
-     * \return number of words found
-     */
+     * \return number of words found */
+
    size_t count_words(const std::string &text);

    /** Count words in C-string, ignore any whitespace matching " \t\r\n\f"
     *
     * \param text string that should be searched
-     * \return number of words found
-     */
+     * \return number of words found */
+
    size_t count_words(const char *text);

    /** Count words in a single line, trim anything from '#' onward
     *
     * \param text string that should be trimmed and searched
     * \param separators string containing characters that will be treated as whitespace
-     * \return number of words found
-     */
+     * \return number of words found */
+
    size_t trim_and_count_words(const std::string &text, const std::string &separators = " \t\r\n\f");

    /** Take text and split into non-whitespace words.
@ -247,22 +248,22 @@ namespace LAMMPS_NS {
     * Use a tokenizer class for that.
     *
     * \param text string that should be split
-     * \return STL vector with the words
-     */
+     * \return STL vector with the words */
+
    std::vector<std::string> split_words(const std::string &text);

    /** Check if string can be converted to valid integer
     *
     * \param str string that should be checked
-     * \return true, if string contains valid integer, false otherwise
-     */
+     * \return true, if string contains valid a integer, false otherwise */
+
    bool is_integer(const std::string &str);

    /** Check if string can be converted to valid floating-point number
     *
     * \param str string that should be checked
-     * \return true, if string contains valid floating-point number, false otherwise
-     */
+     * \return true, if string contains valid number, false otherwise */
+
    bool is_double(const std::string &str);

    /** Try to detect pathname from FILE pointer.
@ -272,55 +273,60 @@ namespace LAMMPS_NS {
     *  \param buf  storage buffer for pathname. output will be truncated if not large enough
     *  \param len  size of storage buffer. output will be truncated to this length - 1
     *  \param fp   FILE pointer struct from STDIO library for which we want to detect the name
-     *  \return pointer to the storage buffer, i.e. buf
-     */
+     *  \return pointer to the storage buffer, i.e. buf */
+
    const char *guesspath(char *buf, int len, FILE *fp);

    /** Strip off leading part of path, return just the filename
     *
     * \param path file path
-     * \return file name
-     */
+     * \return file name */
+
    std::string path_basename(const std::string &path);

-    /**
-     * \brief Join two paths
-     * \param a first path
-     * \param b second path
-     * \return combined path
-     */
+    /** Join two pathname segments
+     *
+     * This uses the forward slash '/' character unless LAMMPS is compiled
+     * for Windows where it used the equivalent backward slash '\\'.
+     *
+     * \param   a  first path
+     * \param   b  second path
+     * \return     combined path */
+
    std::string path_join(const std::string &a, const std::string &b);

-    /**
-     * \brief Check if file exists and is readable
+    /** Check if file exists and is readable
+     *
     * \param path file path
-     * \return true if file exists and is readable
-     */
+     * \return true if file exists and is readable */
+
    bool file_is_readable(const std::string &path);

    /** Determine full path of potential file. If file is not found in current directory,
     *  search directories listed in LAMMPS_POTENTIALS environment variable
     *
     * \param path file path
-     * \return full path to potential file
-     */
+     * \return full path to potential file */
+
    std::string get_potential_file_path(const std::string &path);

    /** Read potential file and return DATE field if it is present
     *
     * \param path file path
     * \param potential_name name of potential that is being read
-     * \return DATE field if present
-     */
-    std::string get_potential_date(const std::string &path, const std::string &potential_name);
+     * \return DATE field if present */
+
+    std::string get_potential_date(const std::string &path,
+                                   const std::string &potential_name);

    /** Read potential file and return UNITS field if it is present
     *
     * \param path file path
     * \param potential_name name of potential that is being read
-     * \return UNITS field if present
-     */
-    std::string get_potential_units(const std::string &path, const std::string &potential_name);
+     * \return UNITS field if present */
+
+    std::string get_potential_units(const std::string &path,
+                                    const std::string &potential_name);

    enum { NOCONVERT = 0, METAL2REAL = 1, REAL2METAL = 1<<1 };
    enum { UNKNOWN = 0, ENERGY };
@ -328,16 +334,15 @@ namespace LAMMPS_NS {
    /** Return bitmask of available conversion factors for a given property
     *
     * \param property property to be converted
-     * \return bitmask indicating available conversions
-     */
+     * \return bitmask indicating available conversions */
    int get_supported_conversions(const int property);

    /** Return unit conversion factor for given property and selected from/to units
     *
     * \param property property to be converted
     * \param conversion constant indicating the conversion
-     * \return conversion factor
-     */
+     * \return conversion factor */
+
    double get_conversion_factor(const int property, const int conversion);

    /** Open a potential file as specified by *name*
@ -368,8 +373,8 @@ namespace LAMMPS_NS {
     * The strings "off" and "unlimited" result in -1
     *
     * \param timespec a string in the following format: ([[HH:]MM:]SS)
-     * \return total in seconds
-     */
+     * \return total in seconds */
+
    double timespec2seconds(const std::string &timespec);

    /** Convert a LAMMPS version date to a number
@ -386,9 +391,26 @@ namespace LAMMPS_NS {
     * No check is made whether the date is valid.
     *
     * \param  date  string in the format (Day Month Year)
-     * \return       date code
-     */
+     * \return       date code */
    int date2num(const std::string &date);
+
+    /** Custom merge sort implementation
+     *
+     * This function provides a custom upward hybrid merge sort
+     * implementation with support to pass an opaque pointer to
+     * the comparison function, e.g. for access to class members.
+     * This avoids having to use global variables.  For improved
+     * performance, it uses an in-place insertion sort on initial
+     * chunks of up to 64 elements and switches to merge sort from
+     * then on.
+     *
+     * \param  index  Array with indices to be sorted
+     * \param  num    Length of the index array
+     * \param  ptr    Pointer to opaque object passed to comparison function
+     * \param  comp   Pointer to comparison function */
+
+    void merge_sort(int *index, int num, void *ptr,
+                    int (*comp)(int, int, void *));
  }
 }

--- a/tools/singularity/centos7.def
+++ b/tools/singularity/centos7.def
@ -11,7 +11,7 @@ From: centos:7
            hdf5-devel python36-virtualenv python36-pip python-pip \
            netcdf-devel netcdf-cxx-devel netcdf-mpich-devel netcdf-openmpi-devel \
            python-virtualenv fftw-devel voro++-devel eigen3-devel gsl-devel openblas-devel enchant \
-            blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel
+            blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel zstd libzstd-devel
        yum clean all

        # we need to reset any module variables
@ -36,7 +36,7 @@ From: centos:7
        # manually install Plumed
        mkdir plumed
        cd plumed
-        version=2.6.0
+        version=2.6.1
        curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
        tar -xzf plumed.tar.gz
        cd plumed-${version}
--- a/tools/singularity/centos8.def
+++ b/tools/singularity/centos8.def
@ -16,7 +16,8 @@ From: centos:8
               texlive-latex-bin texlive-lualatex-math texlive-fncychap texlive-tabulary \
               texlive-framed texlive-wrapfig texlive-upquote texlive-capt-of \
               texlive-needspace texlive-titlesec texlive-anysize texlive-dvipng \
-               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel
+               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel \
+               zstd libzstd-devel
        dnf clean all

        # we need to reset any module variables
@ -41,7 +42,7 @@ From: centos:8
        # manually install Plumed
        mkdir plumed
        cd plumed
-        version=2.6.0
+        version=2.6.1
        curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
        tar -xzf plumed.tar.gz
        cd plumed-${version}
--- a/tools/singularity/fedora32_mingw.def
+++ b/tools/singularity/fedora32_mingw.def
@ -36,7 +36,8 @@ From: fedora:32
               texlive-latex-bin texlive-lualatex-math texlive-fncychap texlive-tabulary \
               texlive-framed texlive-wrapfig texlive-upquote texlive-capt-of \
               texlive-needspace texlive-titlesec texlive-anysize texlive-dvipng \
-               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel
+               blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel \
+               zstd libzstd-devel
        dnf clean all

        # enable Lmod and load MPI
@ -47,7 +48,7 @@ From: fedora:32
        # manually install Plumed
        mkdir plumed
        cd plumed
-        version=2.6.0
+        version=2.6.1
        curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
        tar -xzf plumed.tar.gz
        cd plumed-${version}
--- a/tools/singularity/ubuntu18.04.def
+++ b/tools/singularity/ubuntu18.04.def
@ -41,6 +41,7 @@ From: ubuntu:18.04
        libproj-dev \
        libvtk6-dev \
        libyaml-dev \
+        libzstd-dev \
        make \
        mpi-default-bin \
        mpi-default-dev \
@ -70,26 +71,54 @@ From: ubuntu:18.04
        xxd \
        valgrind \
        gdb \
+        zstd \
        libkim-api-dev \
        openkim-models

-    # clean cache
-    rm -rf /var/lib/apt/lists/*

-        # manually install Plumed
-        mkdir plumed
-        cd plumed
-        version=2.6.0
-        curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
-        tar -xzf plumed.tar.gz
-        cd plumed-${version}
-        ./configure --disable-doc --prefix=/usr
-        make
-        make install
-        # fix up installation for CentOS and Fedora
-        # mv -v /usr/lib/pkgconfig/plumed* /usr/share/pkgconfig/
-        cd ../../
-        rm -rvf plumed
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################

    # set custom prompt indicating the container name
    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
@ -99,6 +128,13 @@ PS1="[ubuntu18.04:\u@\h] \W> "
 EOF
    chmod 755 $CUSTOM_PROMPT_ENV

+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
    LC_ALL=C
    export LC_ALL
--- a/tools/singularity/ubuntu18.04_amd_rocm.def
+++ b/tools/singularity/ubuntu18.04_amd_rocm.def
@ -2,7 +2,7 @@ BootStrap: docker
 From: ubuntu:18.04

 %environment
-    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    export PATH=/usr/lib/ccache:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64

 %post
    export DEBIAN_FRONTEND=noninteractive
@ -25,7 +25,8 @@ From: ubuntu:18.04
        build-essential

    apt-get install --no-install-recommends -y software-properties-common
-
+    add-apt-repository ppa:openkim/latest
+    apt-get update
    apt-get install --no-install-recommends -y \
        bc \
        build-essential \
@ -41,7 +42,6 @@ From: ubuntu:18.04
        gfortran \
        git \
        hdf5-tools \
-        kmod \
        less \
        libblas-dev \
        libeigen3-dev \
@ -52,12 +52,15 @@ From: ubuntu:18.04
        libhwloc-dev \
        libjpeg-dev \
        liblapack-dev \
+        libnetcdf-dev \
        libomp-dev \
        libopenblas-dev \
        libnuma-dev \
        libpng-dev \
        libproj-dev \
        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
        make \
        mpi-default-bin \
        mpi-default-dev \
@ -79,30 +82,85 @@ From: ubuntu:18.04
        wget \
        xxd \
        valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models


+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
-    git clone -b master-rocm-3.5 https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
    mkdir hipCUB/build
    cd hipCUB/build
    CXX=hipcc cmake -D BUILD_TEST=off ..
-    make -j4
+    make
    make package
    make install

-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################

    # set custom prompt indicating the container name
    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
    cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/rocm:\u@\h] \W> "
+PS1="[ubuntu18.04/rocm:\u@\h] \W> "
 EOF
    chmod 755 $CUSTOM_PROMPT_ENV


+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
    LC_ALL=C
    export LC_ALL
--- a/tools/singularity/ubuntu18.04_gpu.def
+++ b/tools/singularity/ubuntu18.04_gpu.def
@ -2,11 +2,11 @@ BootStrap: docker
 From: ubuntu:18.04

 %environment
-    export PATH=/usr/lib/ccache:/usr/local/cuda-10.2/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
-    export CUDADIR=/usr/local/cuda-10.2
-    export CUDA_PATH=/usr/local/cuda-10.2
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-10.2/lib64
-    export LIBRARY_PATH=/usr/local/cuda-10.2/lib64/stubs
+    export PATH=/usr/lib/ccache:/usr/local/cuda-11.0/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    export CUDADIR=/usr/local/cuda-11.0
+    export CUDA_PATH=/usr/local/cuda-11.0
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.0/lib64
+    export LIBRARY_PATH=/usr/local/cuda-11.0/lib64/stubs
 %post
    export DEBIAN_FRONTEND=noninteractive
    apt-get update
@ -28,6 +28,8 @@ From: ubuntu:18.04
        build-essential

    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
    apt-get install --no-install-recommends -y \
        bc \
        build-essential \
@ -43,7 +45,6 @@ From: ubuntu:18.04
        gfortran \
        git \
        hdf5-tools \
-        kmod \
        less \
        libblas-dev \
        libeigen3-dev \
@ -54,12 +55,15 @@ From: ubuntu:18.04
        libhwloc-dev \
        libjpeg-dev \
        liblapack-dev \
+        libnetcdf-dev \
        libomp-dev \
        libopenblas-dev \
        libnuma-dev \
        libpng-dev \
        libproj-dev \
        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
        make \
        mpi-default-bin \
        mpi-default-dev \
@ -81,7 +85,15 @@ From: ubuntu:18.04
        wget \
        xxd \
        valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # CUDA
+    ###########################################################################

    wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
    mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
@ -89,7 +101,7 @@ From: ubuntu:18.04
    add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/ /"
    apt-get update

-    export CUDA_PKG_VERSION=10.2
+    export CUDA_PKG_VERSION=11.0

    apt-get install -y --no-install-recommends \
        cuda-libraries-$CUDA_PKG_VERSION \
@ -97,37 +109,90 @@ From: ubuntu:18.04
        cuda-libraries-dev-$CUDA_PKG_VERSION \
        cuda-minimal-build-$CUDA_PKG_VERSION \
        cuda-compat-$CUDA_PKG_VERSION \
-        libcublas10 \
-        libcublas-dev
-
-    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
-    git clone -b master-rocm-3.5 https://github.com/ROCmSoftwarePlatform/hipCUB.git
-    mkdir hipCUB/build
-    cd hipCUB/build
-    CXX=hipcc cmake -D BUILD_TEST=off ..
-    make -j4
-    make package
-    make install
+        libcublas-11-0 \
+        libcublas-dev-11-0

    echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf
    echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf

-    # clean cache
-    rm -rf /var/lib/apt/lists/*
-
    # add missing symlink
-    ln -s /usr/local/cuda-10.2 /usr/local/cuda
-    ln -s /usr/local/cuda-10.2/lib64/stubs/libcuda.so /usr/local/cuda-10.2/lib64/stubs/libcuda.so.1
+    ln -s /usr/local/cuda-11.0 /usr/local/cuda
+    ln -s /usr/local/cuda-11.0/lib64/stubs/libcuda.so /usr/local/cuda-11.0/lib64/stubs/libcuda.so.1
+
+
+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
+    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    mkdir hipCUB/build
+    cd hipCUB/build
+    CXX=hipcc cmake -D BUILD_TEST=off ..
+    make
+    make package
+    make install
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################

    # set custom prompt indicating the container name
    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
    cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/gpu:\u@\h] \W> "
+PS1="[ubuntu18.04/gpu:\u@\h] \W> "
 EOF
    chmod 755 $CUSTOM_PROMPT_ENV


+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
    LC_ALL=C
    export LC_ALL
--- a/tools/singularity/ubuntu18.04_intel_opencl.def
+++ b/tools/singularity/ubuntu18.04_intel_opencl.def
@ -4,8 +4,10 @@ From: ubuntu:18.04
 %post
    export DEBIAN_FRONTEND=noninteractive
    apt-get update
-    apt-get upgrade --no-install-recommends -y
    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
    apt-get install --no-install-recommends -y \
        bc \
        build-essential \
@ -38,6 +40,8 @@ From: ubuntu:18.04
        libpng-dev \
        libproj-dev \
        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
        make \
        mpi-default-bin \
        mpi-default-dev \
@ -61,24 +65,79 @@ From: ubuntu:18.04
        wget \
        xxd \
        valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+    ###########################################################################
+    # Intel OpenCL
+    ###########################################################################

    add-apt-repository ppa:intel-opencl/intel-opencl
    apt-get update
    apt-get install -y intel-opencl-icd

-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################

    # set custom prompt indicating the container name
    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
    cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/intel:\u@\h] \W> "
+PS1="[ubuntu18.04/intel:\u@\h] \W> "
 EOF
    chmod 755 $CUSTOM_PROMPT_ENV


+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+
 %environment
    LC_ALL=C
    export LC_ALL
--- a/tools/singularity/ubuntu18.04_nvidia.def
+++ b/tools/singularity/ubuntu18.04_nvidia.def
@ -1,11 +1,13 @@
 BootStrap: docker
-From: nvidia/cuda:10.2-devel-ubuntu18.04
+From: nvidia/cuda:11.0-devel-ubuntu18.04

 %post
    export DEBIAN_FRONTEND=noninteractive
    apt-get update
-    apt-get upgrade --no-install-recommends -y
    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
    apt-get install --no-install-recommends -y \
        bc \
        build-essential \
@ -38,6 +40,8 @@ From: nvidia/cuda:10.2-devel-ubuntu18.04
        libpng-dev \
        libproj-dev \
        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
        make \
        mpi-default-bin \
        mpi-default-dev \
@ -61,23 +65,77 @@ From: nvidia/cuda:10.2-devel-ubuntu18.04
        wget \
        xxd \
        valgrind \
-        gdb
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+    ###########################################################################
+    # NVIDIA OpenCL
+    ###########################################################################

    mkdir -p /etc/OpenCL/vendors
    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd

-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################

    # set custom prompt indicating the container name
    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
    cat >$CUSTOM_PROMPT_ENV <<EOF
 #!/bin/bash
-PS1="[ubuntu18/nvidia:\u@\h] \W> "
+PS1="[ubuntu18.04/nvidia:\u@\h] \W> "
 EOF
    chmod 755 $CUSTOM_PROMPT_ENV


+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
    LC_ALL=C
    export LC_ALL
--- a/tools/singularity/ubuntu20.04.def
+++ b/tools/singularity/ubuntu20.04.def
@ -41,6 +41,7 @@ From: ubuntu:20.04
        libproj-dev \
        libvtk6-dev \
        libyaml-dev \
+        libzstd-dev \
        make \
        mpi-default-bin \
        mpi-default-dev \
@ -66,26 +67,52 @@ From: ubuntu:20.04
        xxd \
        valgrind \
        gdb \
+        zstd \
        libkim-api-dev \
        openkim-models

-    # clean cache
-    rm -rf /var/lib/apt/lists/*
+    ###########################################################################
+    # KIM-API
+    ###########################################################################

-        # manually install Plumed
-        mkdir plumed
-        cd plumed
-        version=2.6.0
-        curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz
-        tar -xzf plumed.tar.gz
-        cd plumed-${version}
-        ./configure --disable-doc --prefix=/usr
-        make
-        make install
-        # fix up installation for CentOS and Fedora
-        # mv -v /usr/lib/pkgconfig/plumed* /usr/share/pkgconfig/
-        cd ../../
-        rm -rvf plumed
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################

    # set custom prompt indicating the container name
    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
@ -95,6 +122,13 @@ PS1="[ubuntu20.04:\u@\h] \W> "
 EOF
    chmod 755 $CUSTOM_PROMPT_ENV

+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
 %environment
    LC_ALL=C
    export LC_ALL
--- a/tools/singularity/ubuntu20.04_amd_rocm.def
+++ b/tools/singularity/ubuntu20.04_amd_rocm.def
@ -0,0 +1,170 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%environment
+    export PATH=/usr/lib/ccache:/usr/local/cuda-11.0/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+
+    apt-get install -y --no-install-recommends curl libnuma-dev gnupg
+
+    curl -sL http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | apt-key add -
+    # AMD is using xenial folder also for focal
+    printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main" > /etc/apt/sources.list.d/rocm.list
+
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        kmod \
+        file \
+        sudo \
+        libelf1 \
+        rocm-dev \
+        rocm-libs \
+        build-essential
+
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libnetcdf-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
+    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    mkdir hipCUB/build
+    cd hipCUB/build
+    CXX=hipcc cmake -D BUILD_TEST=off ..
+    make
+    make package
+    make install
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/rocm:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
--- a/tools/singularity/ubuntu20.04_gpu.def
+++ b/tools/singularity/ubuntu20.04_gpu.def
@ -0,0 +1,203 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%environment
+    export PATH=/usr/lib/ccache:/usr/local/cuda-11.0/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    export CUDADIR=/usr/local/cuda-11.0
+    export CUDA_PATH=/usr/local/cuda-11.0
+    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.0/lib64
+    export LIBRARY_PATH=/usr/local/cuda-11.0/lib64/stubs
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+
+    apt-get install -y --no-install-recommends curl libnuma-dev gnupg
+
+    curl -sL http://repo.radeon.com/rocm/apt/debian/rocm.gpg.key | apt-key add -
+    # AMD is using xenial folder also for focal
+    printf "deb [arch=amd64] http://repo.radeon.com/rocm/apt/debian/ xenial main" > /etc/apt/sources.list.d/rocm.list
+
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        kmod \
+        file \
+        sudo \
+        libelf1 \
+        rocm-dev \
+        rocm-libs \
+        build-essential
+
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libnetcdf-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # CUDA
+    ###########################################################################
+
+    wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/cuda-ubuntu2004.pin
+    mv cuda-ubuntu2004.pin /etc/apt/preferences.d/cuda-repository-pin-600
+    apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub
+    add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /"
+    apt-get update
+
+    export CUDA_PKG_VERSION=11.0
+
+    apt-get install -y --no-install-recommends \
+        cuda-libraries-$CUDA_PKG_VERSION \
+        cuda-command-line-tools-$CUDA_PKG_VERSION \
+        cuda-libraries-dev-$CUDA_PKG_VERSION \
+        cuda-minimal-build-$CUDA_PKG_VERSION \
+        cuda-compat-$CUDA_PKG_VERSION \
+        libcublas-11-0 \
+        libcublas-dev-11-0
+
+    echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf
+    echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
+
+    # add missing symlink
+    ln -s /usr/local/cuda-11.0 /usr/local/cuda
+    ln -s /usr/local/cuda-11.0/lib64/stubs/libcuda.so /usr/local/cuda-11.0/lib64/stubs/libcuda.so.1
+
+
+    ###########################################################################
+    # ROCm hipCUB
+    ###########################################################################
+
+    export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64
+    git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git
+    mkdir hipCUB/build
+    cd hipCUB/build
+    CXX=hipcc cmake -D BUILD_TEST=off ..
+    make
+    make package
+    make install
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/gpu:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
--- a/tools/singularity/ubuntu20.04_intel_opencl.def
+++ b/tools/singularity/ubuntu20.04_intel_opencl.def
@ -0,0 +1,145 @@
+BootStrap: docker
+From: ubuntu:20.04
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libnetcdf-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+    ###########################################################################
+    # Intel OpenCL
+    ###########################################################################
+
+    add-apt-repository ppa:intel-opencl/intel-opencl
+    apt-get update
+    apt-get install -y intel-opencl-icd
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/intel:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
--- a/tools/singularity/ubuntu20.04_nvidia.def
+++ b/tools/singularity/ubuntu20.04_nvidia.def
@ -0,0 +1,147 @@
+BootStrap: docker
+From: nvidia/cuda:11.0-devel-ubuntu20.04
+
+%post
+    export DEBIAN_FRONTEND=noninteractive
+    apt-get update
+    apt-get install --no-install-recommends -y software-properties-common
+    add-apt-repository ppa:openkim/latest
+    apt-get update
+    apt-get upgrade --no-install-recommends -y
+    apt-get install --no-install-recommends -y \
+        bc \
+        build-essential \
+        ccache \
+        clang \
+        clinfo \
+        cmake \
+        cmake-curses-gui \
+        curl \
+        doxygen \
+        enchant \
+        g++ \
+        gcc \
+        gfortran \
+        git \
+        hdf5-tools \
+        less \
+        libblas-dev \
+        libeigen3-dev \
+        libenchant-dev \
+        libfftw3-dev \
+        libgsl-dev \
+        libhdf5-serial-dev \
+        libhwloc-dev \
+        libjpeg-dev \
+        liblapack-dev \
+        libomp-dev \
+        libopenblas-dev \
+        libnuma-dev \
+        libpng-dev \
+        libproj-dev \
+        libvtk6-dev \
+        libyaml-dev \
+        libzstd-dev \
+        make \
+        mpi-default-bin \
+        mpi-default-dev \
+        ninja-build \
+        ocl-icd-libopencl1 \
+        ocl-icd-opencl-dev \
+        python3-dev \
+        python3-pip \
+        python3-pkg-resources \
+        python3-setuptools \
+        python3-virtualenv \
+        rsync \
+        ssh \
+        vim-nox \
+        virtualenv \
+        voro++-dev \
+        wget \
+        xxd \
+        valgrind \
+        gdb \
+        zstd \
+        libkim-api-dev \
+        openkim-models
+
+
+    ###########################################################################
+    # NVIDIA OpenCL
+    ###########################################################################
+
+    mkdir -p /etc/OpenCL/vendors
+    echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
+
+
+    ###########################################################################
+    # KIM-API
+    ###########################################################################
+
+    # workaround for installing files in /usr/share/doc inside of a container
+    sed -i 's/path-exclude=\/usr\/share\/doc/#path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+    apt-get install -y libkim-api-doc
+    sed -i 's/#path-exclude=\/usr\/share\/doc/path-exclude=\/usr\/share\/doc/g' /etc/dpkg/dpkg.cfg.d/excludes
+
+    # install KIM models
+    KIM_API_EXAMPLES=/usr/share/doc/libkim-api-dev/examples
+    gunzip $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003/LennardJones612_UniversalShifted.params.gz
+    gunzip $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ/ex_model_driver_P_LJ.f90.gz
+
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/LennardJones612__MD_414112407348_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/model-drivers/ex_model_driver_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones_Ar
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/ex_model_Ar_P_LJ
+    kim-api-collections-management install system $KIM_API_EXAMPLES/portable-models/LennardJones612_UniversalShifted__MO_959249795837_003
+    kim-api-collections-management install system $KIM_API_EXAMPLES/simulator-models/Sim_LAMMPS_LJcut_AkersonElliott_Alchemy_PbAu
+
+
+    ###########################################################################
+    # Plumed
+    ###########################################################################
+
+    export PLUMED_PKG_VERSION=2.6.1
+
+    mkdir plumed
+    cd plumed
+    curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${PLUMED_PKG_VERSION}/plumed-src-${PLUMED_PKG_VERSION}.tgz
+    tar -xzf plumed.tar.gz
+    cd plumed-${PLUMED_PKG_VERSION}
+    ./configure --disable-doc --prefix=/usr
+    make
+    make install
+    cd ../../
+    rm -rvf plumed
+
+
+    ###########################################################################
+    # Customizations
+    ###########################################################################
+
+    # set custom prompt indicating the container name
+    CUSTOM_PROMPT_ENV=/.singularity.d/env/99-zz_custom_prompt.sh
+    cat >$CUSTOM_PROMPT_ENV <<EOF
+#!/bin/bash
+PS1="[ubuntu20.04/nvidia:\u@\h] \W> "
+EOF
+    chmod 755 $CUSTOM_PROMPT_ENV
+
+
+    ###########################################################################
+    # Cleanup
+    ###########################################################################
+    # clean cache
+    rm -rf /var/lib/apt/lists/*
+
+%environment
+    LC_ALL=C
+    export LC_ALL
+    # tell OpenMPI to not try using Infiniband
+    OMPI_MCA_btl="^openib"
+    # do not warn about unused components as this messes up testing
+    OMPI_MCA_btl_base_warn_component_unused="0"
+    export OMPI_MCA_btl OMPI_MCA_btl_base_warn_component_unused
+
+%labels
+    Author rbberger, akohlmey
--- a/unittest/c-library/test_library_open.cpp
+++ b/unittest/c-library/test_library_open.cpp
@ -164,7 +164,7 @@ TEST(lammps_open_fortran, no_args) {
    MPI_Comm_split(MPI_COMM_WORLD, 0, 1, &mycomm);
    int fcomm = MPI_Comm_c2f(mycomm);
    ::testing::internal::CaptureStdout();
-    void *handle = lammps_open_fortran(0, NULL, fcomm, NULL);
+    void *handle = lammps_open_fortran(0, NULL, fcomm);
    std::string output = ::testing::internal::GetCapturedStdout();
    EXPECT_STREQ(output.substr(0,6).c_str(),"LAMMPS");
    LAMMPS_NS::LAMMPS *lmp = (LAMMPS_NS::LAMMPS *)handle;
--- a/unittest/utils/CMakeLists.txt
+++ b/unittest/utils/CMakeLists.txt
@ -2,6 +2,10 @@ add_executable(test_tokenizer test_tokenizer.cpp)
 target_link_libraries(test_tokenizer PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
 add_test(Tokenizer test_tokenizer)

+add_executable(test_mempool test_mempool.cpp)
+target_link_libraries(test_mempool PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
+add_test(MemPool test_mempool)
+
 add_executable(test_utils test_utils.cpp)
 target_link_libraries(test_utils PRIVATE lammps GTest::GMockMain GTest::GMock GTest::GTest)
 add_test(Utils test_utils)
--- a/unittest/utils/test_mempool.cpp
+++ b/unittest/utils/test_mempool.cpp
@ -0,0 +1,347 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "lmptype.h"
+#include "my_page.h"
+#include "my_pool_chunk.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using namespace LAMMPS_NS;
+
+TEST(MyPage, int) {
+    MyPage<int> p;
+
+    // default init. maxchunk=1, pagesize=1024
+    int rv = p.init();
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    int *iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+    ASSERT_EQ(iptr,p.vget());
+    // use too large chunk size
+    p.vgot(2);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(1));
+    ASSERT_EQ(p.ndatum,3);
+    ASSERT_EQ(p.nchunk,3);
+
+    // restart with custom init. maxchunk=16, pagesize=256
+    rv = p.init(16,64,2);
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(16);
+    iptr += 16;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,16);
+    ASSERT_EQ(p.nchunk,1);
+
+    // use too large chunk size
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(32);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(16);
+    iptr = p.vget();
+    p.vgot(4);
+    iptr += 4;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(int)*128.0);
+    ASSERT_EQ(p.ndatum,37);
+    ASSERT_EQ(p.nchunk,4);
+    p.get(16);
+    p.get(16);
+    // allocation on the same page
+    iptr = p.get(16);
+    iptr += 16;
+    ASSERT_EQ(iptr,p.get(16));
+    // allocation on different pages
+    p.get(16);
+    iptr += 16;
+    ASSERT_NE(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(int)*256.0);
+    ASSERT_EQ(p.ndatum,133);
+    ASSERT_EQ(p.nchunk,10);
+}
+
+TEST(MyPage, double) {
+    MyPage<double> p;
+
+    // default init. maxchunk=1, pagesize=1024
+    int rv = p.init();
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    double *iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+    ASSERT_EQ(iptr,p.vget());
+    // use too large chunk size
+    p.vgot(2);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(1));
+    ASSERT_EQ(p.ndatum,3);
+    ASSERT_EQ(p.nchunk,3);
+
+    // restart with custom init. maxchunk=16, pagesize=256
+    rv = p.init(16,64,2);
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(16);
+    iptr += 16;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,16);
+    ASSERT_EQ(p.nchunk,1);
+
+    // use too large chunk size
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(32);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(16);
+    iptr = p.vget();
+    p.vgot(4);
+    iptr += 4;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(double)*128.0);
+    ASSERT_EQ(p.ndatum,37);
+    ASSERT_EQ(p.nchunk,4);
+    p.get(16);
+    p.get(16);
+    // allocation on the same page
+    iptr = p.get(16);
+    iptr += 16;
+    ASSERT_EQ(iptr,p.get(16));
+    // allocation on different pages
+    p.get(16);
+    iptr += 16;
+    ASSERT_NE(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(double)*256.0);
+    ASSERT_EQ(p.ndatum,133);
+    ASSERT_EQ(p.nchunk,10);
+}
+
+TEST(MyPage, bigint) {
+    MyPage<bigint> p;
+
+    // default init. maxchunk=1, pagesize=1024
+    int rv = p.init();
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    bigint *iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+    ASSERT_EQ(iptr,p.vget());
+    // use too large chunk size
+    p.vgot(2);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(1);
+    ++iptr;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(1));
+    ASSERT_EQ(p.ndatum,3);
+    ASSERT_EQ(p.nchunk,3);
+
+    // restart with custom init. maxchunk=16, pagesize=256
+    rv = p.init(16,64,2);
+    ASSERT_EQ(rv,0);
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    // second call to vget() should give same pointer without vgot()
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(16);
+    iptr += 16;
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,16);
+    ASSERT_EQ(p.nchunk,1);
+
+    // use too large chunk size
+    ASSERT_EQ(iptr,p.vget());
+    p.vgot(32);
+    ASSERT_EQ(1,p.status());
+
+    p.reset();
+    ASSERT_EQ(0,p.status());
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+
+    iptr = p.vget();
+    p.vgot(16);
+    iptr = p.vget();
+    p.vgot(4);
+    iptr += 4;
+    ASSERT_EQ(iptr,p.get());
+    ++iptr;
+    ASSERT_EQ(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(bigint)*128.0);
+    ASSERT_EQ(p.ndatum,37);
+    ASSERT_EQ(p.nchunk,4);
+    p.get(16);
+    p.get(16);
+    // allocation on the same page
+    iptr = p.get(16);
+    iptr += 16;
+    ASSERT_EQ(iptr,p.get(16));
+    // allocation on different pages
+    p.get(16);
+    iptr += 16;
+    ASSERT_NE(iptr,p.get(16));
+    ASSERT_DOUBLE_EQ(p.size(),(double)sizeof(bigint)*256.0);
+    ASSERT_EQ(p.ndatum,133);
+    ASSERT_EQ(p.nchunk,10);
+}
+
+TEST(MyPoolChunk, int) {
+    // defaults to minchunk=1, maxchunk=1, nbin=1, chunksperpage=1024, pagedelta=1
+    MyPoolChunk<int> p;
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+    ASSERT_EQ(p.size(),0.0);
+
+    int idx=~0x0000;
+    int *iptr = p.get(idx);
+    ASSERT_NE(iptr,nullptr);
+    ASSERT_EQ(idx,0);
+
+    iptr = p.get(1,idx);
+    ASSERT_NE(iptr,nullptr);
+    ASSERT_EQ(idx,1);
+    // we have only one page allocated
+    ASSERT_EQ(p.size(),1024*sizeof(int)+1024*sizeof(int)+sizeof(void *)+sizeof(int));
+    ASSERT_EQ(p.ndatum,2);
+    ASSERT_EQ(p.nchunk,2);
+
+    p.put(0);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+
+    iptr = p.get(2,idx);
+    ASSERT_EQ(iptr,nullptr);
+    ASSERT_EQ(p.status(),3);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+}
+
+TEST(MyPoolChunk, double) {
+    // defaults to minchunk=1, maxchunk=1, nbin=1, chunksperpage=1024, pagedelta=1
+    MyPoolChunk<double> p;
+
+    ASSERT_EQ(p.ndatum,0);
+    ASSERT_EQ(p.nchunk,0);
+    ASSERT_EQ(p.size(),0.0);
+
+    int idx=~0x0000;
+    double *dptr = p.get(idx);
+    ASSERT_NE(dptr,nullptr);
+    ASSERT_EQ(idx,0);
+
+    dptr = p.get(1,idx);
+    ASSERT_NE(dptr,nullptr);
+    ASSERT_EQ(idx,1);
+    // we have only one page allocated
+    ASSERT_EQ(p.size(),1024*sizeof(int)+1024*sizeof(double)+sizeof(void *)+sizeof(int));
+
+    p.put(0);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+
+    dptr = p.get(2,idx);
+    ASSERT_EQ(dptr,nullptr);
+    ASSERT_EQ(p.status(),3);
+    ASSERT_EQ(p.ndatum,1);
+    ASSERT_EQ(p.nchunk,1);
+}
+