From d17e06c479e961879a1c3b0a3ba5cefa2e5c1121 Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Tue, 13 May 2014 14:04:47 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@11976
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 doc/Manual.html.html                       | 266 -------------------
 doc/Section_commands.html                  |  31 +--
 doc/Section_commands.txt                   |   5 +-
 doc/balance.html                           | 282 +++++++++++++--------
 doc/balance.txt                            | 279 ++++++++++++--------
 doc/{communicate.html => comm_modify.html} |  57 +++--
 doc/{communicate.txt => comm_modify.txt}   |  54 ++--
 doc/comm_style.html                        |  70 +++++
 doc/comm_style.txt                         |  65 +++++
 doc/create_box.html                        |  22 +-
 doc/create_box.txt                         |  22 +-
 doc/fix_balance.html                       | 243 +++++++++++-------
 doc/fix_balance.txt                        | 241 +++++++++++-------
 doc/processors.html                        |  51 ++--
 doc/processors.txt                         |  51 ++--
 doc/read_data.html                         |   7 +-
 doc/read_data.txt                          |   7 +-
 doc/read_restart.html                      |  10 +-
 doc/read_restart.txt                       |  10 +-
 doc/replicate.html                         |   7 +-
 doc/replicate.txt                          |   7 +-
 21 files changed, 1009 insertions(+), 778 deletions(-)
 delete mode 100644 doc/Manual.html.html
 rename doc/{communicate.html => comm_modify.html} (73%)
 rename doc/{communicate.txt => comm_modify.txt} (73%)
 create mode 100644 doc/comm_style.html
 create mode 100644 doc/comm_style.txt
diff --git a/doc/Manual.html.html b/doc/Manual.html.html
deleted file mode 100644
index 72aee7a66c..0000000000
--- a/doc/Manual.html.html
+++ /dev/null
@@ -1,266 +0,0 @@
-<HTML>
-<HTML>
-<HEAD>
-<TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="10 May 2014 version">
-<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
-<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
-</HEAD>
-
-<BODY>
-
-<CENTER><A HREF = "http://lammps.sandia.gov">LAMMPS WWW Site</A> - <A HREF = "Manual.html">LAMMPS Documentation</A> - <A HREF = "Section_commands.html#comm">LAMMPS Commands</A> 
-</CENTER>
-
-<HR>
-
-<H1></H1>
-
-<P><CENTER><H3>LAMMPS Documentation 
-</H3></CENTER>
-<CENTER><H4>10 May 2014 version 
-</H4></CENTER>
-<H4>Version info: 
-</H4>
-<P>The LAMMPS "version" is the date when it was released, such as 1 May
-2010. LAMMPS is updated continuously.  Whenever we fix a bug or add a
-feature, we release it immediately, and post a notice on <A HREF = "http://lammps.sandia.gov/bug.html">this page of
-the WWW site</A>.  Each dated copy of LAMMPS contains all the
-features and bug-fixes up to and including that version date. The
-version date is printed to the screen and logfile every time you run
-LAMMPS. It is also in the file src/version.h and in the LAMMPS
-directory name created when you unpack a tarball, and at the top of
-the first page of the manual (this page).
-</P>
-<UL><LI>If you browse the HTML doc pages on the LAMMPS WWW site, they always
-describe the most current version of LAMMPS. 
-</P>
-<P><LI>If you browse the HTML doc pages included in your tarball, they
-describe the version you have. 
-</P>
-<P><LI>The <A HREF = "Manual.pdf">PDF file</A> on the WWW site or in the tarball is updated
-about once per month.  This is because it is large, and we don't want
-it to be part of every patch. 
-</P>
-<LI>There is also a <A HREF = "Developer.pdf">Developer.pdf</A> file in the doc
-directory, which describes the internal structure and algorithms of
-LAMMPS.  
-</UL>
-<P>LAMMPS stands for Large-scale Atomic/Molecular Massively Parallel
-Simulator.
-</P>
-<P>LAMMPS is a classical molecular dynamics simulation code designed to
-run efficiently on parallel computers.  It was developed at Sandia
-National Laboratories, a US Department of Energy facility, with
-funding from the DOE.  It is an open-source code, distributed freely
-under the terms of the GNU Public License (GPL).
-</P>
-<P>The primary developers of LAMMPS are <A HREF = "http://www.sandia.gov/~sjplimp">Steve Plimpton</A>, Aidan
-Thompson, and Paul Crozier who can be contacted at
-sjplimp,athomps,pscrozi at sandia.gov.  The <A HREF = "http://lammps.sandia.gov">LAMMPS WWW Site</A> at
-http://lammps.sandia.gov has more information about the code and its
-uses.
-</P>
-
-<HR>
-
-<P>The LAMMPS documentation is organized into the following sections.  If
-you find errors or omissions in this manual or have suggestions for
-useful information to add, please send an email to the developers so
-we can improve the LAMMPS documentation.
-</P>
-<P>Once you are familiar with LAMMPS, you may want to bookmark <A HREF = "Section_commands.html#comm">this
-page</A> at Section_commands.html#comm since
-it gives quick access to documentation for all LAMMPS commands.
-</P>
-<P><A HREF = "Manual.pdf">PDF file</A> of the entire manual, generated by
-<A HREF = "http://www.easysw.com/htmldoc">htmldoc</A>
-</P>
-<OL><LI><A HREF = "Section_intro.html">Introduction</A> 
-
-<UL>  1.1 <A HREF = "Section_intro.html#intro_1">What is LAMMPS</A> 
-<BR>
-  1.2 <A HREF = "Section_intro.html#intro_2">LAMMPS features</A> 
-<BR>
-  1.3 <A HREF = "Section_intro.html#intro_3">LAMMPS non-features</A> 
-<BR>
-  1.4 <A HREF = "Section_intro.html#intro_4">Open source distribution</A> 
-<BR>
-  1.5 <A HREF = "Section_intro.html#intro_5">Acknowledgments and citations</A> 
-<BR></UL>
-<LI><A HREF = "Section_start.html">Getting started</A> 
-
-<UL>  2.1 <A HREF = "Section_start.html#start_1">What's in the LAMMPS distribution</A> 
-<BR>
-  2.2 <A HREF = "Section_start.html#start_2">Making LAMMPS</A> 
-<BR>
-  2.3 <A HREF = "Section_start.html#start_3">Making LAMMPS with optional packages</A> 
-<BR>
-  2.4 <A HREF = "Section_start.html#start_4">Building LAMMPS via the Make.py script</A> 
-<BR>
-  2.5 <A HREF = "Section_start.html#start_5">Building LAMMPS as a library</A> 
-<BR>
-  2.6 <A HREF = "Section_start.html#start_6">Running LAMMPS</A> 
-<BR>
-  2.7 <A HREF = "Section_start.html#start_7">Command-line options</A> 
-<BR>
-  2.8 <A HREF = "Section_start.html#start_8">Screen output</A> 
-<BR>
-  2.9 <A HREF = "Section_start.html#start_9">Tips for users of previous versions</A> 
-<BR></UL>
-<LI><A HREF = "Section_commands.html">Commands</A> 
-
-<UL>  3.1 <A HREF = "Section_commands.html#cmd_1">LAMMPS input script</A> 
-<BR>
-  3.2 <A HREF = "Section_commands.html#cmd_2">Parsing rules</A> 
-<BR>
-  3.3 <A HREF = "Section_commands.html#cmd_3">Input script structure</A> 
-<BR>
-  3.4 <A HREF = "Section_commands.html#cmd_4">Commands listed by category</A> 
-<BR>
-  3.5 <A HREF = "Section_commands.html#cmd_5">Commands listed alphabetically</A> 
-<BR></UL>
-<LI><A HREF = "Section_packages.html">Packages</A> 
-
-<UL>  4.1 <A HREF = "Section_packages.html#pkg_1">Standard packages</A> 
-<BR>
-  4.2 <A HREF = "Section_packages.html#pkg_2">User packages</A> 
-<BR></UL>
-<LI><A HREF = "Section_accelerate.html">Accelerating LAMMPS performance</A> 
-
-<UL>  5.1 <A HREF = "Section_accelerate.html#acc_1">Measuring performance</A> 
-<BR>
-  5.2 <A HREF = "Section_accelerate.html#acc_2">General strategies</A> 
-<BR>
-  5.3 <A HREF = "Section_accelerate.html#acc_3">Packages with optimized styles</A> 
-<BR>
-  5.4 <A HREF = "Section_accelerate.html#acc_4">OPT package</A> 
-<BR>
-  5.5 <A HREF = "Section_accelerate.html#acc_5">USER-OMP package</A> 
-<BR>
-  5.6 <A HREF = "Section_accelerate.html#acc_6">GPU package</A> 
-<BR>
-  5.7 <A HREF = "Section_accelerate.html#acc_7">USER-CUDA package</A> 
-<BR>
-  5.8 <A HREF = "Section_accelerate.html#acc_8">Comparison of GPU and USER-CUDA packages</A> 
-<BR></UL>
-<LI><A HREF = "Section_howto.html">How-to discussions</A> 
-
-<UL>  6.1 <A HREF = "Section_howto.html#howto_1">Restarting a simulation</A> 
-<BR>
-  6.2 <A HREF = "Section_howto.html#howto_2">2d simulations</A> 
-<BR>
-  6.3 <A HREF = "Section_howto.html#howto_3">CHARMM and AMBER force fields</A> 
-<BR>
-  6.4 <A HREF = "Section_howto.html#howto_4">Running multiple simulations from one input script</A> 
-<BR>
-  6.5 <A HREF = "Section_howto.html#howto_5">Multi-replica simulations</A> 
-<BR>
-  6.6 <A HREF = "Section_howto.html#howto_6">Granular models</A> 
-<BR>
-  6.7 <A HREF = "Section_howto.html#howto_7">TIP3P water model</A> 
-<BR>
-  6.8 <A HREF = "Section_howto.html#howto_8">TIP4P water model</A> 
-<BR>
-  6.9 <A HREF = "Section_howto.html#howto_9">SPC water model</A> 
-<BR>
-  6.10 <A HREF = "Section_howto.html#howto_10">Coupling LAMMPS to other codes</A> 
-<BR>
-  6.11 <A HREF = "Section_howto.html#howto_11">Visualizing LAMMPS snapshots</A> 
-<BR>
-  6.12 <A HREF = "Section_howto.html#howto_12">Triclinic (non-orthogonal) simulation boxes</A> 
-<BR>
-  6.13 <A HREF = "Section_howto.html#howto_13">NEMD simulations</A> 
-<BR>
-  6.14 <A HREF = "Section_howto.html#howto_14">Finite-size spherical and aspherical particles</A> 
-<BR>
-  6.15 <A HREF = "Section_howto.html#howto_15">Output from LAMMPS (thermo, dumps, computes, fixes, variables)</A> 
-<BR>
-  6.16 <A HREF = "Section_howto.html#howto_16">Thermostatting, barostatting, and compute temperature</A> 
-<BR>
-  6.17 <A HREF = "Section_howto.html#howto_17">Walls</A> 
-<BR>
-  6.18 <A HREF = "Section_howto.html#howto_18">Elastic constants</A> 
-<BR>
-  6.19 <A HREF = "Section_howto.html#howto_19">Library interface to LAMMPS</A> 
-<BR>
-  6.20 <A HREF = "Section_howto.html#howto_20">Calculating thermal conductivity</A> 
-<BR>
-  6.21 <A HREF = "Section_howto.html#howto_21">Calculating viscosity</A> 
-<BR>
-  6.22 <A HREF = "howto_22">Calculating a diffusion coefficient</A> 
-<BR></UL>
-<LI><A HREF = "Section_example.html">Example problems</A> 
-
-<LI><A HREF = "Section_perf.html">Performance & scalability</A> 
-
-<LI><A HREF = "Section_tools.html">Additional tools</A> 
-
-<LI><A HREF = "Section_modify.html">Modifying & extending LAMMPS</A> 
-
-<UL>  10.1 <A HREF = "Section_modify.html#mod_1">Atom styles</A> 
-<BR>
-  10.2 <A HREF = "Section_modify.html#mod_2">Bond, angle, dihedral, improper potentials</A> 
-<BR>
-  10.3 <A HREF = "Section_modify.html#mod_3">Compute styles</A> 
-<BR>
-  10.4 <A HREF = "Section_modify.html#mod_4">Dump styles</A> 
-<BR>
-  10.5 <A HREF = "Section_modify.html#mod_5">Dump custom output options</A> 
-<BR>
-  10.6 <A HREF = "Section_modify.html#mod_6">Fix styles</A> 
-<BR>
-  10.7 <A HREF = "Section_modify.html#mod_7">Input script commands</A> 
-<BR>
-  10.8 <A HREF = "Section_modify.html#mod_8">Kspace computations</A> 
-<BR>
-  10.9 <A HREF = "Section_modify.html#mod_9">Minimization styles</A> 
-<BR>
-  10.10 <A HREF = "Section_modify.html#mod_10">Pairwise potentials</A> 
-<BR>
-  10.11 <A HREF = "Section_modify.html#mod_11">Region styles</A> 
-<BR>
-  10.12 <A HREF = "Section_modify.html#mod_12">Body styles</A> 
-<BR>
-  10.13 <A HREF = "Section_modify.html#mod_13">Thermodynamic output options</A> 
-<BR>
-  10.14 <A HREF = "Section_modify.html#mod_14">Variable options</A> 
-<BR>
-  10.15 <A HREF = "Section_modify.html#mod_15">Submitting new features for inclusion in LAMMPS</A> 
-<BR></UL>
-<LI><A HREF = "Section_python.html">Python interface</A> 
-
-<UL>  11.1 <A HREF = "Section_python.html#py_1">Building LAMMPS as a shared library</A> 
-<BR>
-  11.2 <A HREF = "Section_python.html#py_2">Installing the Python wrapper into Python</A> 
-<BR>
-  11.3 <A HREF = "Section_python.html#py_3">Extending Python with MPI to run in parallel</A> 
-<BR>
-  11.4 <A HREF = "Section_python.html#py_4">Testing the Python-LAMMPS interface</A> 
-<BR>
-  11.5 <A HREF = "Section_python.html#py_5">Using LAMMPS from Python</A> 
-<BR>
-  11.6 <A HREF = "Section_python.html#py_6">Example Python scripts that use LAMMPS</A> 
-<BR></UL>
-<LI><A HREF = "Section_errors.html">Errors</A> 
-
-<UL>  12.1 <A HREF = "Section_errors.html#err_1">Common problems</A> 
-<BR>
-  12.2 <A HREF = "Section_errors.html#err_2">Reporting bugs</A> 
-<BR>
-  12.3 <A HREF = "Section_errors.html#err_3">Error & warning messages</A> 
-<BR></UL>
-<LI><A HREF = "Section_history.html">Future and history</A> 
-
-<UL>  13.1 <A HREF = "Section_history.html#hist_1">Coming attractions</A> 
-<BR>
-  13.2 <A HREF = "Section_history.html#hist_2">Past versions</A> 
-<BR></UL>
-
-</OL>
-
-</BODY>
-
-</HTML>
-
-</HTML>
diff --git a/doc/Section_commands.html b/doc/Section_commands.html
index 0888116918..229de6b9da 100644
--- a/doc/Section_commands.html
+++ b/doc/Section_commands.html
@@ -309,7 +309,7 @@ in the command's documentation.
 </P>
 <P>Settings:
 </P>
-<P><A HREF = "communicate.html">communicate</A>, <A HREF = "group.html">group</A>, <A HREF = "mass.html">mass</A>,
+<P><A HREF = "comm_style.html">comm_style</A>, <A HREF = "group.html">group</A>, <A HREF = "mass.html">mass</A>,
 <A HREF = "min_modify.html">min_modify</A>, <A HREF = "min_style.html">min_style</A>,
 <A HREF = "neigh_modify.html">neigh_modify</A>, <A HREF = "neighbor.html">neighbor</A>,
 <A HREF = "reset_timestep.html">reset_timestep</A>, <A HREF = "run_style.html">run_style</A>,
@@ -362,20 +362,21 @@ in the command's documentation.
 </P>
 <DIV ALIGN=center><TABLE  BORDER=1 >
 <TR ALIGN="center"><TD ><A HREF = "angle_coeff.html">angle_coeff</A></TD><TD ><A HREF = "angle_style.html">angle_style</A></TD><TD ><A HREF = "atom_modify.html">atom_modify</A></TD><TD ><A HREF = "atom_style.html">atom_style</A></TD><TD ><A HREF = "balance.html">balance</A></TD><TD ><A HREF = "bond_coeff.html">bond_coeff</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "bond_style.html">bond_style</A></TD><TD ><A HREF = "boundary.html">boundary</A></TD><TD ><A HREF = "box.html">box</A></TD><TD ><A HREF = "change_box.html">change_box</A></TD><TD ><A HREF = "clear.html">clear</A></TD><TD ><A HREF = "communicate.html">communicate</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "compute.html">compute</A></TD><TD ><A HREF = "compute_modify.html">compute_modify</A></TD><TD ><A HREF = "create_atoms.html">create_atoms</A></TD><TD ><A HREF = "create_box.html">create_box</A></TD><TD ><A HREF = "delete_atoms.html">delete_atoms</A></TD><TD ><A HREF = "delete_bonds.html">delete_bonds</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "dielectric.html">dielectric</A></TD><TD ><A HREF = "dihedral_coeff.html">dihedral_coeff</A></TD><TD ><A HREF = "dihedral_style.html">dihedral_style</A></TD><TD ><A HREF = "dimension.html">dimension</A></TD><TD ><A HREF = "displace_atoms.html">displace_atoms</A></TD><TD ><A HREF = "dump.html">dump</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "dump_image.html">dump image</A></TD><TD ><A HREF = "dump_modify.html">dump_modify</A></TD><TD ><A HREF = "dump_image.html">dump movie</A></TD><TD ><A HREF = "echo.html">echo</A></TD><TD ><A HREF = "fix.html">fix</A></TD><TD ><A HREF = "fix_modify.html">fix_modify</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "group.html">group</A></TD><TD ><A HREF = "if.html">if</A></TD><TD ><A HREF = "improper_coeff.html">improper_coeff</A></TD><TD ><A HREF = "improper_style.html">improper_style</A></TD><TD ><A HREF = "include.html">include</A></TD><TD ><A HREF = "jump.html">jump</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "kspace_modify.html">kspace_modify</A></TD><TD ><A HREF = "kspace_style.html">kspace_style</A></TD><TD ><A HREF = "label.html">label</A></TD><TD ><A HREF = "lattice.html">lattice</A></TD><TD ><A HREF = "log.html">log</A></TD><TD ><A HREF = "mass.html">mass</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "minimize.html">minimize</A></TD><TD ><A HREF = "min_modify.html">min_modify</A></TD><TD ><A HREF = "min_style.html">min_style</A></TD><TD ><A HREF = "molecule.html">molecule</A></TD><TD ><A HREF = "neb.html">neb</A></TD><TD ><A HREF = "neigh_modify.html">neigh_modify</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "neighbor.html">neighbor</A></TD><TD ><A HREF = "newton.html">newton</A></TD><TD ><A HREF = "next.html">next</A></TD><TD ><A HREF = "package.html">package</A></TD><TD ><A HREF = "pair_coeff.html">pair_coeff</A></TD><TD ><A HREF = "pair_modify.html">pair_modify</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "pair_style.html">pair_style</A></TD><TD ><A HREF = "pair_write.html">pair_write</A></TD><TD ><A HREF = "partition.html">partition</A></TD><TD ><A HREF = "prd.html">prd</A></TD><TD ><A HREF = "print.html">print</A></TD><TD ><A HREF = "processors.html">processors</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "quit.html">quit</A></TD><TD ><A HREF = "read_data.html">read_data</A></TD><TD ><A HREF = "read_dump.html">read_dump</A></TD><TD ><A HREF = "read_restart.html">read_restart</A></TD><TD ><A HREF = "region.html">region</A></TD><TD ><A HREF = "replicate.html">replicate</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "rerun.html">rerun</A></TD><TD ><A HREF = "reset_timestep.html">reset_timestep</A></TD><TD ><A HREF = "restart.html">restart</A></TD><TD ><A HREF = "run.html">run</A></TD><TD ><A HREF = "run_style.html">run_style</A></TD><TD ><A HREF = "set.html">set</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "shell.html">shell</A></TD><TD ><A HREF = "special_bonds.html">special_bonds</A></TD><TD ><A HREF = "suffix.html">suffix</A></TD><TD ><A HREF = "tad.html">tad</A></TD><TD ><A HREF = "temper.html">temper</A></TD><TD ><A HREF = "thermo.html">thermo</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "thermo_modify.html">thermo_modify</A></TD><TD ><A HREF = "thermo_style.html">thermo_style</A></TD><TD ><A HREF = "timestep.html">timestep</A></TD><TD ><A HREF = "uncompute.html">uncompute</A></TD><TD ><A HREF = "undump.html">undump</A></TD><TD ><A HREF = "unfix.html">unfix</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "units.html">units</A></TD><TD ><A HREF = "variable.html">variable</A></TD><TD ><A HREF = "velocity.html">velocity</A></TD><TD ><A HREF = "write_data.html">write_data</A></TD><TD ><A HREF = "write_dump.html">write_dump</A></TD><TD ><A HREF = "write_restart.html">write_restart</A> 
+<TR ALIGN="center"><TD ><A HREF = "bond_style.html">bond_style</A></TD><TD ><A HREF = "boundary.html">boundary</A></TD><TD ><A HREF = "box.html">box</A></TD><TD ><A HREF = "change_box.html">change_box</A></TD><TD ><A HREF = "clear.html">clear</A></TD><TD ><A HREF = "comm_modify.html">comm_modify</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "comm_style.html">comm_style</A></TD><TD ><A HREF = "compute.html">compute</A></TD><TD ><A HREF = "compute_modify.html">compute_modify</A></TD><TD ><A HREF = "create_atoms.html">create_atoms</A></TD><TD ><A HREF = "create_box.html">create_box</A></TD><TD ><A HREF = "delete_atoms.html">delete_atoms</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "delete_bonds.html">delete_bonds</A></TD><TD ><A HREF = "dielectric.html">dielectric</A></TD><TD ><A HREF = "dihedral_coeff.html">dihedral_coeff</A></TD><TD ><A HREF = "dihedral_style.html">dihedral_style</A></TD><TD ><A HREF = "dimension.html">dimension</A></TD><TD ><A HREF = "displace_atoms.html">displace_atoms</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "dump.html">dump</A></TD><TD ><A HREF = "dump_image.html">dump image</A></TD><TD ><A HREF = "dump_modify.html">dump_modify</A></TD><TD ><A HREF = "dump_image.html">dump movie</A></TD><TD ><A HREF = "echo.html">echo</A></TD><TD ><A HREF = "fix.html">fix</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_modify.html">fix_modify</A></TD><TD ><A HREF = "group.html">group</A></TD><TD ><A HREF = "if.html">if</A></TD><TD ><A HREF = "improper_coeff.html">improper_coeff</A></TD><TD ><A HREF = "improper_style.html">improper_style</A></TD><TD ><A HREF = "include.html">include</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "jump.html">jump</A></TD><TD ><A HREF = "kspace_modify.html">kspace_modify</A></TD><TD ><A HREF = "kspace_style.html">kspace_style</A></TD><TD ><A HREF = "label.html">label</A></TD><TD ><A HREF = "lattice.html">lattice</A></TD><TD ><A HREF = "log.html">log</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "mass.html">mass</A></TD><TD ><A HREF = "minimize.html">minimize</A></TD><TD ><A HREF = "min_modify.html">min_modify</A></TD><TD ><A HREF = "min_style.html">min_style</A></TD><TD ><A HREF = "molecule.html">molecule</A></TD><TD ><A HREF = "neb.html">neb</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "neigh_modify.html">neigh_modify</A></TD><TD ><A HREF = "neighbor.html">neighbor</A></TD><TD ><A HREF = "newton.html">newton</A></TD><TD ><A HREF = "next.html">next</A></TD><TD ><A HREF = "package.html">package</A></TD><TD ><A HREF = "pair_coeff.html">pair_coeff</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "pair_modify.html">pair_modify</A></TD><TD ><A HREF = "pair_style.html">pair_style</A></TD><TD ><A HREF = "pair_write.html">pair_write</A></TD><TD ><A HREF = "partition.html">partition</A></TD><TD ><A HREF = "prd.html">prd</A></TD><TD ><A HREF = "print.html">print</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "processors.html">processors</A></TD><TD ><A HREF = "quit.html">quit</A></TD><TD ><A HREF = "read_data.html">read_data</A></TD><TD ><A HREF = "read_dump.html">read_dump</A></TD><TD ><A HREF = "read_restart.html">read_restart</A></TD><TD ><A HREF = "region.html">region</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "replicate.html">replicate</A></TD><TD ><A HREF = "rerun.html">rerun</A></TD><TD ><A HREF = "reset_timestep.html">reset_timestep</A></TD><TD ><A HREF = "restart.html">restart</A></TD><TD ><A HREF = "run.html">run</A></TD><TD ><A HREF = "run_style.html">run_style</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "set.html">set</A></TD><TD ><A HREF = "shell.html">shell</A></TD><TD ><A HREF = "special_bonds.html">special_bonds</A></TD><TD ><A HREF = "suffix.html">suffix</A></TD><TD ><A HREF = "tad.html">tad</A></TD><TD ><A HREF = "temper.html">temper</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "thermo.html">thermo</A></TD><TD ><A HREF = "thermo_modify.html">thermo_modify</A></TD><TD ><A HREF = "thermo_style.html">thermo_style</A></TD><TD ><A HREF = "timestep.html">timestep</A></TD><TD ><A HREF = "uncompute.html">uncompute</A></TD><TD ><A HREF = "undump.html">undump</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "unfix.html">unfix</A></TD><TD ><A HREF = "units.html">units</A></TD><TD ><A HREF = "variable.html">variable</A></TD><TD ><A HREF = "velocity.html">velocity</A></TD><TD ><A HREF = "write_data.html">write_data</A></TD><TD ><A HREF = "write_dump.html">write_dump</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "write_restart.html">write_restart</A> 
 </TD></TR></TABLE></DIV>
 
 <P>These are commands contributed by users, which can be used if <A HREF = "Section_start.html#start_3">LAMMPS
diff --git a/doc/Section_commands.txt b/doc/Section_commands.txt
index c9acfcd8f4..133972811f 100644
--- a/doc/Section_commands.txt
+++ b/doc/Section_commands.txt
@@ -305,7 +305,7 @@ Force fields:
 
 Settings:
 
-"communicate"_communicate.html, "group"_group.html, "mass"_mass.html,
+"comm_style"_comm_style.html, "group"_group.html, "mass"_mass.html,
 "min_modify"_min_modify.html, "min_style"_min_style.html,
 "neigh_modify"_neigh_modify.html, "neighbor"_neighbor.html,
 "reset_timestep"_reset_timestep.html, "run_style"_run_style.html,
@@ -367,7 +367,8 @@ in the command's documentation.
 "box"_box.html,
 "change_box"_change_box.html,
 "clear"_clear.html,
-"communicate"_communicate.html,
+"comm_modify"_comm_modify.html,
+"comm_style"_comm_style.html,
 "compute"_compute.html,
 "compute_modify"_compute_modify.html,
 "create_atoms"_create_atoms.html,
diff --git a/doc/balance.html b/doc/balance.html
index 94088e0e01..22ffe25648 100644
--- a/doc/balance.html
+++ b/doc/balance.html
@@ -13,111 +13,178 @@
 </H3>
 <P><B>Syntax:</B>
 </P>
-<PRE>balance keyword args ... 
+<PRE>balance thresh style args keyword value ... 
 </PRE>
-<UL><LI>one or more keyword/arg pairs may be appended 
+<UL><LI>thresh = imbalance threshhold that must be exceeded to perform a re-balance 
 
-<LI>keyword = <I>x</I> or <I>y</I> or <I>z</I> or <I>dynamic</I> or <I>out</I> 
+<LI>style = <I>x</I> or <I>y</I> or <I>z</I> or <I>shift</I> or <I>rcb</I> 
 
-<PRE> <I>x</I> args = <I>uniform</I> or Px-1 numbers between 0 and 1
-   <I>uniform</I> = evenly spaced cuts between processors in x dimension
-   numbers = Px-1 ascending values between 0 and 1, Px - # of processors in x dimension
- <I>y</I> args = <I>uniform</I> or Py-1 numbers between 0 and 1
-   <I>uniform</I> = evenly spaced cuts between processors in y dimension
-   numbers = Py-1 ascending values between 0 and 1, Py - # of processors in y dimension
- <I>z</I> args = <I>uniform</I> or Pz-1 numbers between 0 and 1
-   <I>uniform</I> = evenly spaced cuts between processors in z dimension
-   numbers = Pz-1 ascending values between 0 and 1, Pz - # of processors in z dimension
- <I>dynamic</I> args = dimstr Niter thresh
-   dimstr = sequence of letters containing "x" or "y" or "z", each not more than once
-   Niter = # of times to iterate within each dimension of dimstr sequence
-   thresh = stop balancing when this imbalance threshhold is reached
- <I>out</I> arg = filename
-   filename = output file to write each processor's sub-domain to 
+<PRE>  <I>x</I> args = <I>uniform</I> or Px-1 numbers between 0 and 1
+    <I>uniform</I> = evenly spaced cuts between processors in x dimension
+    numbers = Px-1 ascending values between 0 and 1, Px - # of processors in x dimension
+    <I>x</I> can be specified together with <I>y</I> or <I>z</I>
+  <I>y</I> args = <I>uniform</I> or Py-1 numbers between 0 and 1
+    <I>uniform</I> = evenly spaced cuts between processors in y dimension
+    numbers = Py-1 ascending values between 0 and 1, Py - # of processors in y dimension
+    <I>y</I> can be specified together with <I>x</I> or <I>z</I>
+  <I>z</I> args = <I>uniform</I> or Pz-1 numbers between 0 and 1
+    <I>uniform</I> = evenly spaced cuts between processors in z dimension
+    numbers = Pz-1 ascending values between 0 and 1, Pz - # of processors in z dimension
+    <I>z</I> can be specified together with <I>x</I> or <I>y</I>
+  <I>shift</I> args = dimstr Niter stopthresh
+    dimstr = sequence of letters containing "x" or "y" or "z", each not more than once
+    Niter = # of times to iterate within each dimension of dimstr sequence
+    stopthresh = stop balancing when this imbalance threshhold is reached
+  <I>rcb</I> args = none 
+</PRE>
+<LI>zero or more keyword/value pairs may be appended 
+
+<LI>keyword = <I>out</I> 
+
+<PRE>  <I>out</I> value = filename
+    filename = write each processor's sub-domain to a file 
 </PRE>
 
 </UL>
 <P><B>Examples:</B>
 </P>
-<PRE>balance x uniform y 0.4 0.5 0.6
-balance dynamic xz 5 1.1
-balance dynamic x 20 1.0 out tmp.balance 
+<PRE>balance 0.9 x uniform y 0.4 0.5 0.6
+balance 1.2 shift xz 5 1.1
+balance 1.0 shift xz 5 1.1
+balance 1.1 rcb
+balance 1.0 shift x 20 1.0 out tmp.balance 
 </PRE>
 <P><B>Description:</B>
 </P>
-<P>This command adjusts the size of processor sub-domains within the
-simulation box, to attempt to balance the number of particles and thus
-the computational cost (load) evenly across processors.  The load
-balancing is "static" in the sense that this command performs the
-balancing once, before or between simulations.  The processor
-sub-domains will then remain static during the subsequent run.  To
-perform "dynamic" balancing, see the <A HREF = "fix_balance.html">fix balance</A>
-command, which can adjust processor sub-domain sizes on-the-fly during
-a <A HREF = "run.html">run</A>.
+<P>IMPORTANT NOTE: The <I>rcb</I> style is not yet implemented.
 </P>
-<P>Load-balancing is only useful if the particles in the simulation box
-have a spatially-varying density distribution.  E.g. a model of a
-vapor/liquid interface, or a solid with an irregular-shaped geometry
-containing void regions.  In this case, the LAMMPS default of dividing
-the simulation box volume into a regular-spaced grid of processor
-sub-domain, with one equal-volume sub-domain per procesor, may assign
-very different numbers of particles per processor.  This can lead to
-poor performance in a scalability sense, when the simulation is run in
+<P>This command adjusts the size and shape of processor sub-domains
+within the simulation box, to attempt to balance the number of
+particles and thus the computational cost (load) evenly across
+processors.  The load balancing is "static" in the sense that this
+command performs the balancing once, before or between simulations.
+The processor sub-domains will then remain static during the
+subsequent run.  To perform "dynamic" balancing, see the <A HREF = "fix_balance.html">fix
+balance</A> command, which can adjust processor
+sub-domain sizes and shapes on-the-fly during a <A HREF = "run.html">run</A>.
+</P>
+<P>Load-balancing is typically only useful if the particles in the
+simulation box have a spatially-varying density distribution.  E.g. a
+model of a vapor/liquid interface, or a solid with an irregular-shaped
+geometry containing void regions.  In this case, the LAMMPS default of
+dividing the simulation box volume into a regular-spaced grid of 3d
+bricks, with one equal-volume sub-domain per procesor, may assign very
+different numbers of particles per processor.  This can lead to poor
+performance in a scalability sense, when the simulation is run in
 parallel.
 </P>
-<P>Note that the <A HREF = "processors.html">processors</A> command gives you control
+<P>Note that the <A HREF = "processors.html">processors</A> command allows some control
 over how the box volume is split across processors.  Specifically, for
-a Px by Py by Pz grid of processors, it chooses or lets you choose Px,
-Py, and Pz, subject to the constraint that Px * Py * Pz = P, the total
-number of processors.  This is sufficient to achieve good load-balance
-for many models on many processor counts.  However, all the processor
-sub-domains will still be the same shape and have the same volume.
+a Px by Py by Pz grid of processors, it allows choice of Px, Py, and
+Pz, subject to the constraint that Px * Py * Pz = P, the total number
+of processors.  This is sufficient to achieve good load-balance for
+many models on many processor counts.  However, all the processor
+sub-domains will still have the same shape and same volume.
 </P>
-<P>This command does not alter the topology of the Px by Py by Pz grid or
-processors.  But it shifts the cutting planes between processors (in
-3d, or lines in 2d), which adjusts the volume (area in 2d) assigned to
-each processor, as in the following 2d diagram.  The left diagram is
-the default partitioning of the simulation box across processors (one
-sub-box for each of 16 processors); the right diagram is after
-balancing.
-</P>
-<CENTER><IMG SRC = "JPG/balance.jpg">
-</CENTER>
-<P>When the balance command completes, it prints out the final positions
-of all cutting planes in each of the 3 dimensions (as fractions of the
-box length).  It also prints statistics about its results, including
-the change in "imbalance factor".  This factor is defined as the
-maximum number of particles owned by any processor, divided by the
+<P>The requested load-balancing operation is only performed if the
+current "imbalance factor" in particles owned by each processor
+exceeds the specified <I>thresh</I> parameter.  This factor is defined as
+the maximum number of particles owned by any processor, divided by the
 average number of particles per processor.  Thus an imbalance factor
 of 1.0 is perfect balance.  For 10000 particles running on 10
 processors, if the most heavily loaded processor has 1200 particles,
-then the factor is 1.2, meaning there is a 20% imbalance.  The change
-in the maximum number of particles (on any processor) is also printed.
+then the factor is 1.2, meaning there is a 20% imbalance.  Note that a
+re-balance can be forced even if the current balance is perfect (1.0)
+be specifying a <I>thresh</I> < 1.0.
+</P>
+<P>When the balance command completes, it prints statistics about its
+results, including the change in the imbalance factor and the change
+in the maximum number of particles (on any processor).  For "grid"
+methods (defined below) that create a logical 3d grid of processors,
+the positions of all cutting planes in each of the 3 dimensions (as
+fractions of the box length) are also printed.
 </P>
 <P>IMPORTANT NOTE: This command attempts to minimize the imbalance
-factor, as defined above.  But because of the topology constraint that
-only the cutting planes (lines) between processors are moved, there
-are many irregular distributions of particles, where this factor
-cannot be shrunk to 1.0, particuarly in 3d.  Also, computational cost
-is not strictly proportional to particle count, and changing the
-relative size and shape of processor sub-domains may lead to
-additional computational and communication overheads, e.g. in the PPPM
-solver used via the <A HREF = "kspace_style.html">kspace_style</A> command.  Thus
-you should benchmark the run times of your simulation before and after
-balancing.
+factor, as defined above.  But depending on the method a perfect
+balance (1.0) may not be achieved.  For example, "grid" methods
+(defined below) that create a logical 3d grid cannot achieve perfect
+balance for many irregular distributions of particles.  Likewise, if a
+portion of the system is a perfect lattice, e.g. the intiial system is
+generated by the <A HREF = "create_atoms.html">create_atoms</A> command, then "grid"
+methods may be unable to achieve exact balance.  This is because
+entire lattice planes will be owned or not owned by a single
+processor.
+</P>
+<P>IMPORTANT NOTE: Computational cost is not strictly proportional to
+particle count, and changing the relative size and shape of processor
+sub-domains may lead to additional computational and communication
+overheads, e.g. in the PPPM solver used via the
+<A HREF = "kspace_style.html">kspace_style</A> command.  Thus you should benchmark
+the run times of a simulation before and after balancing.
 </P>
 <HR>
 
-<P>The <I>x</I>, <I>y</I>, and <I>z</I> keywords adjust the position of cutting planes
-between processor sub-domains in a specific dimension.  The <I>uniform</I>
-argument spaces the planes evenly, as in the left diagram above.  The
-<I>numeric</I> argument requires you to list Ps-1 numbers that specify the
-position of the cutting planes.  This requires that you know Ps = Px
-or Py or Pz = the number of processors assigned by LAMMPS to the
-relevant dimension.  This assignment is made (and the Px, Py, Pz
-values printed out) when the simulation box is created by the
-"create_box" or "read_data" or "read_restart" command and is
-influenced by the settings of the "processors" command.
+<P>The method used to perform a load balance is specified by one of the
+listed styles, which are described in detail below.  There are 2 kinds
+of styles.
+</P>
+<P>The <I>x</I>, <I>y</I>, <I>z</I>, and <I>shift</I> styles are "grid" methods which produce
+a logical 3d grid of processors.  They operate by changing the cutting
+planes (or lines) between processors in 3d (or 2d), to adjust the
+volume (area in 2d) assigned to each processor, as in the following 2d
+diagram.  The left diagram is the default partitioning of the
+simulation box across processors (one sub-box for each of 16
+processors); the right diagram is after balancing.
+</P>
+<CENTER><IMG SRC = "JPG/balance.jpg">
+</CENTER>
+<P>The <I>rcb</I> style is a "tiling" method which does not produce a logical
+3d grid of processors.  Rather it tiles the simulation domain with
+rectangular sub-boxes of varying size and shape in an irregular
+fashion so as to have equal numbers of particles in each sub-box, as
+in the following 2d diagram.  Again the left diagram is the default
+partitioning of the simulation box across processors (one sub-box for
+each of 16 processors); the right diagram is after balancing.
+</P>
+<P>NOTE: Need a diagram of RCB partitioning.
+</P>
+<P>The "grid" methods can be used with either of the
+<A HREF = "comm_style.html">comm_style</A> command options, <I>brick</I> or <I>tiled</I>.  The
+"tiling" methods can only be used with <A HREF = "comm_style.html">comm_style
+tiled</A>.  Note that it can be useful to use a "grid"
+method with <A HREF = "comm_style.html">comm_style tiled</A> to return the domain
+partitioning to a logical 3d grid of processors so that "comm_style
+brick" can be used for subsequent <A HREF = "run.html">run</A> commands.
+</P>
+<P>When a "grid" method is specified, the current domain partitioning can
+be either a logical 3d grid or a tiled partitioning.  In the former
+case, the current logical 3d grid is used as a starting point and
+changes are made to improve the imbalance factor.  In the latter case,
+the tiled partitioning is discarded and a logical 3d grid is created
+with uniform spacing in all dimensions.  This becomes the starting
+point for the balancing operation.
+</P>
+<P>When a "tiling" method is specified, the current domain partitioning
+("grid" or "tiled") is ignored, and a new partitioning is computed
+from scratch.
+</P>
+<HR>
+
+<P>The <I>x</I>, <I>y</I>, and <I>z</I> styles invoke a "grid" method for balancing, as
+described above.  Note that any or all of these 3 styles can be
+specified together, one after the other.  This style adjusts the
+position of cutting planes between processor sub-domains in specific
+dimensions.  Only the specified dimensions are altered.
+</P>
+<P>The <I>uniform</I> argument spaces the planes evenly, as in the left
+diagrams above.  The <I>numeric</I> argument requires listing Ps-1 numbers
+that specify the position of the cutting planes.  This requires
+knowing Ps = Px or Py or Pz = the number of processors assigned by
+LAMMPS to the relevant dimension.  This assignment is made (and the
+Px, Py, Pz values printed out) when the simulation box is created by
+the "create_box" or "read_data" or "read_restart" command and is
+influenced by the settings of the <A HREF = "processors.html">processors</A>
+command.
 </P>
 <P>Each of the numeric values must be between 0 and 1, and they must be
 listed in ascending order.  They represent the fractional position of
@@ -130,12 +197,11 @@ larger than the right processor's sub-domain.
 </P>
 <HR>
 
-<P>The <I>dynamic</I> keyword changes the cutting planes between processors in
-an iterative fashion, seeking to reduce the imbalance factor, similar
-to how the <A HREF = "fix_balance.html">fix balance</A> command operates.  Note that
-this keyword begins its operation from the current processor
-partitioning, which could be uniform or the result of a previous
-balance command.
+<P>The <I>shift</I> style invokes a "grid" method for balancing, as
+described above.  It changes the positions of cutting planes between
+processors in an iterative fashion, seeking to reduce the imbalance
+factor, similar to how the <A HREF = "fix_balance.html">fix balance shift</A>
+command operates.
 </P>
 <P>The <I>dimstr</I> argument is a string of characters, each of which must be
 an "x" or "y" or "z".  Eacn character can appear zero or one time,
@@ -147,14 +213,14 @@ to be a density variation in the particles.
 dimensions listed in <I>dimstr</I>, one dimension at a time.  For a single
 dimension, the balancing operation (described below) is iterated on up
 to <I>Niter</I> times.  After each dimension finishes, the imbalance factor
-is re-computed, and the balancing operation halts if the <I>thresh</I>
+is re-computed, and the balancing operation halts if the <I>stopthresh</I>
 criterion is met.
 </P>
 <P>A rebalance operation in a single dimension is performed using a
 recursive multisectioning algorithm, where the position of each
 cutting plane (line in 2d) in the dimension is adjusted independently.
-This is similar to a recursive bisectioning (RCB) for a single value,
-except that the bounds used for each bisectioning take advantage of
+This is similar to a recursive bisectioning for a single value, except
+that the bounds used for each bisectioning take advantage of
 information from neighboring cuts if possible.  At each iteration, the
 count of particles on either side of each plane is tallied.  If the
 counts do not match the target value for the plane, the position of
@@ -168,26 +234,27 @@ plane gets closer to the target value.
 assigned, particles are migrated to their new owning processor, and
 the balance procedure ends.
 </P>
-<P>IMPORTANT NOTE: At each rebalance operation, the RCB for each cutting
-plane (line in 2d) typcially starts with low and high bounds separated
-by the extent of a processor's sub-domain in one dimension.  The size
-of this bracketing region shrinks by 1/2 every iteration.  Thus if
-<I>Niter</I> is specified as 10, the cutting plane will typically be
-positioned to 1 part in 1000 accuracy (relative to the perfect target
-position).  For <I>Niter</I> = 20, it will be accurate to 1 part in a
-million.  Tus there is no need ot set <I>Niter</I> to a large value.
+<P>IMPORTANT NOTE: At each rebalance operation, the bisectioning for each
+cutting plane (line in 2d) typcially starts with low and high bounds
+separated by the extent of a processor's sub-domain in one dimension.
+The size of this bracketing region shrinks by 1/2 every iteration.
+Thus if <I>Niter</I> is specified as 10, the cutting plane will typically
+be positioned to 1 part in 1000 accuracy (relative to the perfect
+target position).  For <I>Niter</I> = 20, it will be accurate to 1 part in
+a million.  Thus there is no need ot set <I>Niter</I> to a large value.
 LAMMPS will check if the threshold accuracy is reached (in a
 dimension) is less iterations than <I>Niter</I> and exit early.  However,
 <I>Niter</I> should also not be set too small, since it will take roughly
 the same number of iterations to converge even if the cutting plane is
 initially close to the target value.
 </P>
-<P>IMPORTANT NOTE: If a portion of your system is a perfect lattice,
-e.g. the intiial system is generated by the
-<A HREF = "create_atoms.html">create_atoms</A> command, then the balancer may be
-unable to achieve exact balance.  I.e. entire lattice planes will be
-owned or not owned by a single processor.  So you you should not
-expect to achieve perfect balance in this case.
+<HR>
+
+<P>The <I>rcb</I> style invokes a "tiled" method for balancing, as described
+above.  It performs a recursive coordinate bisectioning (RCB) of the
+simulation domain.
+</P>
+<P>Need further description of RCB.
 </P>
 <HR>
 
@@ -242,11 +309,8 @@ only 10 unique vertices in total.
 
 <P><B>Restrictions:</B>
 </P>
-<P>The <I>dynamic</I> keyword cannot be used with the <I>x</I>, <I>y</I>, or <I>z</I>
-arguments.
-</P>
-<P>For 2d simulations, the <I>z</I> keyword cannot be used.  Nor can a "z"
-appear in <I>dimstr</I> for the <I>dynamic</I> keyword.
+<P>For 2d simulations, the <I>z</I> style cannot be used.  Nor can a "z"
+appear in <I>dimstr</I> for the <I>shift</I> style.
 </P>
 <P><B>Related commands:</B>
 </P>
diff --git a/doc/balance.txt b/doc/balance.txt
index 3e21783528..1226c7111e 100644
--- a/doc/balance.txt
+++ b/doc/balance.txt
@@ -10,108 +10,172 @@ balance command :h3
 
 [Syntax:]
 
-balance keyword args ... :pre
+balance thresh style args keyword value ... :pre
 
-one or more keyword/arg pairs may be appended :ulb,l
-keyword = {x} or {y} or {z} or {dynamic} or {out} :l
- {x} args = {uniform} or Px-1 numbers between 0 and 1
-   {uniform} = evenly spaced cuts between processors in x dimension
-   numbers = Px-1 ascending values between 0 and 1, Px - # of processors in x dimension
- {y} args = {uniform} or Py-1 numbers between 0 and 1
-   {uniform} = evenly spaced cuts between processors in y dimension
-   numbers = Py-1 ascending values between 0 and 1, Py - # of processors in y dimension
- {z} args = {uniform} or Pz-1 numbers between 0 and 1
-   {uniform} = evenly spaced cuts between processors in z dimension
-   numbers = Pz-1 ascending values between 0 and 1, Pz - # of processors in z dimension
- {dynamic} args = dimstr Niter thresh
-   dimstr = sequence of letters containing "x" or "y" or "z", each not more than once
-   Niter = # of times to iterate within each dimension of dimstr sequence
-   thresh = stop balancing when this imbalance threshhold is reached
- {out} arg = filename
-   filename = output file to write each processor's sub-domain to :pre
+thresh = imbalance threshhold that must be exceeded to perform a re-balance :ulb,l
+style = {x} or {y} or {z} or {shift} or {rcb} :l
+  {x} args = {uniform} or Px-1 numbers between 0 and 1
+    {uniform} = evenly spaced cuts between processors in x dimension
+    numbers = Px-1 ascending values between 0 and 1, Px - # of processors in x dimension
+    {x} can be specified together with {y} or {z}
+  {y} args = {uniform} or Py-1 numbers between 0 and 1
+    {uniform} = evenly spaced cuts between processors in y dimension
+    numbers = Py-1 ascending values between 0 and 1, Py - # of processors in y dimension
+    {y} can be specified together with {x} or {z}
+  {z} args = {uniform} or Pz-1 numbers between 0 and 1
+    {uniform} = evenly spaced cuts between processors in z dimension
+    numbers = Pz-1 ascending values between 0 and 1, Pz - # of processors in z dimension
+    {z} can be specified together with {x} or {y}
+  {shift} args = dimstr Niter stopthresh
+    dimstr = sequence of letters containing "x" or "y" or "z", each not more than once
+    Niter = # of times to iterate within each dimension of dimstr sequence
+    stopthresh = stop balancing when this imbalance threshhold is reached
+  {rcb} args = none :pre
+zero or more keyword/value pairs may be appended :l
+keyword = {out} :l
+  {out} value = filename
+    filename = write each processor's sub-domain to a file :pre
 :ule
 
 [Examples:]
 
-balance x uniform y 0.4 0.5 0.6
-balance dynamic xz 5 1.1
-balance dynamic x 20 1.0 out tmp.balance :pre
+balance 0.9 x uniform y 0.4 0.5 0.6
+balance 1.2 shift xz 5 1.1
+balance 1.0 shift xz 5 1.1
+balance 1.1 rcb
+balance 1.0 shift x 20 1.0 out tmp.balance :pre
 
 [Description:]
 
-This command adjusts the size of processor sub-domains within the
-simulation box, to attempt to balance the number of particles and thus
-the computational cost (load) evenly across processors.  The load
-balancing is "static" in the sense that this command performs the
-balancing once, before or between simulations.  The processor
-sub-domains will then remain static during the subsequent run.  To
-perform "dynamic" balancing, see the "fix balance"_fix_balance.html
-command, which can adjust processor sub-domain sizes on-the-fly during
-a "run"_run.html.
+IMPORTANT NOTE: The {rcb} style is not yet implemented.
 
-Load-balancing is only useful if the particles in the simulation box
-have a spatially-varying density distribution.  E.g. a model of a
-vapor/liquid interface, or a solid with an irregular-shaped geometry
-containing void regions.  In this case, the LAMMPS default of dividing
-the simulation box volume into a regular-spaced grid of processor
-sub-domain, with one equal-volume sub-domain per procesor, may assign
-very different numbers of particles per processor.  This can lead to
-poor performance in a scalability sense, when the simulation is run in
+This command adjusts the size and shape of processor sub-domains
+within the simulation box, to attempt to balance the number of
+particles and thus the computational cost (load) evenly across
+processors.  The load balancing is "static" in the sense that this
+command performs the balancing once, before or between simulations.
+The processor sub-domains will then remain static during the
+subsequent run.  To perform "dynamic" balancing, see the "fix
+balance"_fix_balance.html command, which can adjust processor
+sub-domain sizes and shapes on-the-fly during a "run"_run.html.
+
+Load-balancing is typically only useful if the particles in the
+simulation box have a spatially-varying density distribution.  E.g. a
+model of a vapor/liquid interface, or a solid with an irregular-shaped
+geometry containing void regions.  In this case, the LAMMPS default of
+dividing the simulation box volume into a regular-spaced grid of 3d
+bricks, with one equal-volume sub-domain per procesor, may assign very
+different numbers of particles per processor.  This can lead to poor
+performance in a scalability sense, when the simulation is run in
 parallel.
 
-Note that the "processors"_processors.html command gives you control
+Note that the "processors"_processors.html command allows some control
 over how the box volume is split across processors.  Specifically, for
-a Px by Py by Pz grid of processors, it chooses or lets you choose Px,
-Py, and Pz, subject to the constraint that Px * Py * Pz = P, the total
-number of processors.  This is sufficient to achieve good load-balance
-for many models on many processor counts.  However, all the processor
-sub-domains will still be the same shape and have the same volume.
+a Px by Py by Pz grid of processors, it allows choice of Px, Py, and
+Pz, subject to the constraint that Px * Py * Pz = P, the total number
+of processors.  This is sufficient to achieve good load-balance for
+many models on many processor counts.  However, all the processor
+sub-domains will still have the same shape and same volume.
 
-This command does not alter the topology of the Px by Py by Pz grid or
-processors.  But it shifts the cutting planes between processors (in
-3d, or lines in 2d), which adjusts the volume (area in 2d) assigned to
-each processor, as in the following 2d diagram.  The left diagram is
-the default partitioning of the simulation box across processors (one
-sub-box for each of 16 processors); the right diagram is after
-balancing.
-
-:c,image(JPG/balance.jpg)
-
-When the balance command completes, it prints out the final positions
-of all cutting planes in each of the 3 dimensions (as fractions of the
-box length).  It also prints statistics about its results, including
-the change in "imbalance factor".  This factor is defined as the
-maximum number of particles owned by any processor, divided by the
+The requested load-balancing operation is only performed if the
+current "imbalance factor" in particles owned by each processor
+exceeds the specified {thresh} parameter.  This factor is defined as
+the maximum number of particles owned by any processor, divided by the
 average number of particles per processor.  Thus an imbalance factor
 of 1.0 is perfect balance.  For 10000 particles running on 10
 processors, if the most heavily loaded processor has 1200 particles,
-then the factor is 1.2, meaning there is a 20% imbalance.  The change
-in the maximum number of particles (on any processor) is also printed.
+then the factor is 1.2, meaning there is a 20% imbalance.  Note that a
+re-balance can be forced even if the current balance is perfect (1.0)
+be specifying a {thresh} < 1.0.
+
+When the balance command completes, it prints statistics about its
+results, including the change in the imbalance factor and the change
+in the maximum number of particles (on any processor).  For "grid"
+methods (defined below) that create a logical 3d grid of processors,
+the positions of all cutting planes in each of the 3 dimensions (as
+fractions of the box length) are also printed.
 
 IMPORTANT NOTE: This command attempts to minimize the imbalance
-factor, as defined above.  But because of the topology constraint that
-only the cutting planes (lines) between processors are moved, there
-are many irregular distributions of particles, where this factor
-cannot be shrunk to 1.0, particuarly in 3d.  Also, computational cost
-is not strictly proportional to particle count, and changing the
-relative size and shape of processor sub-domains may lead to
-additional computational and communication overheads, e.g. in the PPPM
-solver used via the "kspace_style"_kspace_style.html command.  Thus
-you should benchmark the run times of your simulation before and after
-balancing.
+factor, as defined above.  But depending on the method a perfect
+balance (1.0) may not be achieved.  For example, "grid" methods
+(defined below) that create a logical 3d grid cannot achieve perfect
+balance for many irregular distributions of particles.  Likewise, if a
+portion of the system is a perfect lattice, e.g. the intiial system is
+generated by the "create_atoms"_create_atoms.html command, then "grid"
+methods may be unable to achieve exact balance.  This is because
+entire lattice planes will be owned or not owned by a single
+processor.
+
+IMPORTANT NOTE: Computational cost is not strictly proportional to
+particle count, and changing the relative size and shape of processor
+sub-domains may lead to additional computational and communication
+overheads, e.g. in the PPPM solver used via the
+"kspace_style"_kspace_style.html command.  Thus you should benchmark
+the run times of a simulation before and after balancing.
 
 :line
 
-The {x}, {y}, and {z} keywords adjust the position of cutting planes
-between processor sub-domains in a specific dimension.  The {uniform}
-argument spaces the planes evenly, as in the left diagram above.  The
-{numeric} argument requires you to list Ps-1 numbers that specify the
-position of the cutting planes.  This requires that you know Ps = Px
-or Py or Pz = the number of processors assigned by LAMMPS to the
-relevant dimension.  This assignment is made (and the Px, Py, Pz
-values printed out) when the simulation box is created by the
-"create_box" or "read_data" or "read_restart" command and is
-influenced by the settings of the "processors" command.
+The method used to perform a load balance is specified by one of the
+listed styles, which are described in detail below.  There are 2 kinds
+of styles.
+
+The {x}, {y}, {z}, and {shift} styles are "grid" methods which produce
+a logical 3d grid of processors.  They operate by changing the cutting
+planes (or lines) between processors in 3d (or 2d), to adjust the
+volume (area in 2d) assigned to each processor, as in the following 2d
+diagram.  The left diagram is the default partitioning of the
+simulation box across processors (one sub-box for each of 16
+processors); the right diagram is after balancing.
+
+:c,image(JPG/balance.jpg)
+
+The {rcb} style is a "tiling" method which does not produce a logical
+3d grid of processors.  Rather it tiles the simulation domain with
+rectangular sub-boxes of varying size and shape in an irregular
+fashion so as to have equal numbers of particles in each sub-box, as
+in the following 2d diagram.  Again the left diagram is the default
+partitioning of the simulation box across processors (one sub-box for
+each of 16 processors); the right diagram is after balancing.
+
+NOTE: Need a diagram of RCB partitioning.
+
+The "grid" methods can be used with either of the
+"comm_style"_comm_style.html command options, {brick} or {tiled}.  The
+"tiling" methods can only be used with "comm_style
+tiled"_comm_style.html.  Note that it can be useful to use a "grid"
+method with "comm_style tiled"_comm_style.html to return the domain
+partitioning to a logical 3d grid of processors so that "comm_style
+brick" can be used for subsequent "run"_run.html commands.
+
+When a "grid" method is specified, the current domain partitioning can
+be either a logical 3d grid or a tiled partitioning.  In the former
+case, the current logical 3d grid is used as a starting point and
+changes are made to improve the imbalance factor.  In the latter case,
+the tiled partitioning is discarded and a logical 3d grid is created
+with uniform spacing in all dimensions.  This becomes the starting
+point for the balancing operation.
+
+When a "tiling" method is specified, the current domain partitioning
+("grid" or "tiled") is ignored, and a new partitioning is computed
+from scratch.
+
+:line
+
+The {x}, {y}, and {z} styles invoke a "grid" method for balancing, as
+described above.  Note that any or all of these 3 styles can be
+specified together, one after the other.  This style adjusts the
+position of cutting planes between processor sub-domains in specific
+dimensions.  Only the specified dimensions are altered.
+
+The {uniform} argument spaces the planes evenly, as in the left
+diagrams above.  The {numeric} argument requires listing Ps-1 numbers
+that specify the position of the cutting planes.  This requires
+knowing Ps = Px or Py or Pz = the number of processors assigned by
+LAMMPS to the relevant dimension.  This assignment is made (and the
+Px, Py, Pz values printed out) when the simulation box is created by
+the "create_box" or "read_data" or "read_restart" command and is
+influenced by the settings of the "processors"_processors.html
+command.
 
 Each of the numeric values must be between 0 and 1, and they must be
 listed in ascending order.  They represent the fractional position of
@@ -124,12 +188,11 @@ larger than the right processor's sub-domain.
 
 :line
 
-The {dynamic} keyword changes the cutting planes between processors in
-an iterative fashion, seeking to reduce the imbalance factor, similar
-to how the "fix balance"_fix_balance.html command operates.  Note that
-this keyword begins its operation from the current processor
-partitioning, which could be uniform or the result of a previous
-balance command.
+The {shift} style invokes a "grid" method for balancing, as
+described above.  It changes the positions of cutting planes between
+processors in an iterative fashion, seeking to reduce the imbalance
+factor, similar to how the "fix balance shift"_fix_balance.html
+command operates.
 
 The {dimstr} argument is a string of characters, each of which must be
 an "x" or "y" or "z".  Eacn character can appear zero or one time,
@@ -141,14 +204,14 @@ Balancing proceeds by adjusting the cutting planes in each of the
 dimensions listed in {dimstr}, one dimension at a time.  For a single
 dimension, the balancing operation (described below) is iterated on up
 to {Niter} times.  After each dimension finishes, the imbalance factor
-is re-computed, and the balancing operation halts if the {thresh}
+is re-computed, and the balancing operation halts if the {stopthresh}
 criterion is met.
 
 A rebalance operation in a single dimension is performed using a
 recursive multisectioning algorithm, where the position of each
 cutting plane (line in 2d) in the dimension is adjusted independently.
-This is similar to a recursive bisectioning (RCB) for a single value,
-except that the bounds used for each bisectioning take advantage of
+This is similar to a recursive bisectioning for a single value, except
+that the bounds used for each bisectioning take advantage of
 information from neighboring cuts if possible.  At each iteration, the
 count of particles on either side of each plane is tallied.  If the
 counts do not match the target value for the plane, the position of
@@ -162,26 +225,27 @@ Once the rebalancing is complete and final processor sub-domains
 assigned, particles are migrated to their new owning processor, and
 the balance procedure ends.
 
-IMPORTANT NOTE: At each rebalance operation, the RCB for each cutting
-plane (line in 2d) typcially starts with low and high bounds separated
-by the extent of a processor's sub-domain in one dimension.  The size
-of this bracketing region shrinks by 1/2 every iteration.  Thus if
-{Niter} is specified as 10, the cutting plane will typically be
-positioned to 1 part in 1000 accuracy (relative to the perfect target
-position).  For {Niter} = 20, it will be accurate to 1 part in a
-million.  Tus there is no need ot set {Niter} to a large value.
+IMPORTANT NOTE: At each rebalance operation, the bisectioning for each
+cutting plane (line in 2d) typcially starts with low and high bounds
+separated by the extent of a processor's sub-domain in one dimension.
+The size of this bracketing region shrinks by 1/2 every iteration.
+Thus if {Niter} is specified as 10, the cutting plane will typically
+be positioned to 1 part in 1000 accuracy (relative to the perfect
+target position).  For {Niter} = 20, it will be accurate to 1 part in
+a million.  Thus there is no need ot set {Niter} to a large value.
 LAMMPS will check if the threshold accuracy is reached (in a
 dimension) is less iterations than {Niter} and exit early.  However,
 {Niter} should also not be set too small, since it will take roughly
 the same number of iterations to converge even if the cutting plane is
 initially close to the target value.
 
-IMPORTANT NOTE: If a portion of your system is a perfect lattice,
-e.g. the intiial system is generated by the
-"create_atoms"_create_atoms.html command, then the balancer may be
-unable to achieve exact balance.  I.e. entire lattice planes will be
-owned or not owned by a single processor.  So you you should not
-expect to achieve perfect balance in this case.
+:line
+
+The {rcb} style invokes a "tiled" method for balancing, as described
+above.  It performs a recursive coordinate bisectioning (RCB) of the
+simulation domain.
+
+Need further description of RCB.
 
 :line
 
@@ -236,11 +300,8 @@ For a 3d problem, the syntax is similar with "SQUARES" replaced by
 
 [Restrictions:]
 
-The {dynamic} keyword cannot be used with the {x}, {y}, or {z}
-arguments.
-
-For 2d simulations, the {z} keyword cannot be used.  Nor can a "z"
-appear in {dimstr} for the {dynamic} keyword.
+For 2d simulations, the {z} style cannot be used.  Nor can a "z"
+appear in {dimstr} for the {shift} style.
 
 [Related commands:]
 
diff --git a/doc/communicate.html b/doc/comm_modify.html
similarity index 73%
rename from doc/communicate.html
rename to doc/comm_modify.html
index b69457e120..d8819a8c3d 100644
--- a/doc/communicate.html
+++ b/doc/comm_modify.html
@@ -9,19 +9,18 @@
 
 <HR>
 
-<H3>communicate command 
+<H3>comm_modify command 
 </H3>
 <P><B>Syntax:</B>
 </P>
-<PRE>communicate style keyword value ... 
+<PRE>comm_modify keyword value ... 
 </PRE>
-<UL><LI>style = <I>single</I> or <I>multi</I> 
+<UL><LI>zero or more keyword/value pairs may be appended 
 
-<LI>zero or more keyword/value pairs may be appended 
+<LI>keyword = <I>mode</I> or <I>cutoff</I> or <I>group</I> or <I>vel</I> 
 
-<LI>keyword = <I>cutoff</I> or <I>group</I> or <I>vel</I> 
-
-<PRE>  <I>cutoff</I> value = Rcut (distance units) = communicate atoms from this far away
+<PRE>  <I>mode</I> value = <I>single</I> or <I>multi</I> = communicate atoms within a single or multiple distances
+  <I>cutoff</I> value = Rcut (distance units) = communicate atoms from this far away
   <I>group</I> value = group-ID = only communicate atoms in the group
   <I>vel</I> value = <I>yes</I> or <I>no</I> = do or do not communicate velocity info with ghost atoms 
 </PRE>
@@ -29,32 +28,42 @@
 </UL>
 <P><B>Examples:</B>
 </P>
-<PRE>communicate multi
-communicate multi group solvent
-communicate single vel yes
-communicate single cutoff 5.0 vel yes 
+<PRE>communicate mode multi
+communicate mode multi group solvent
+communicate vel yes
+communicate cutoff 5.0 vel yes 
 </PRE>
 <P><B>Description:</B>
 </P>
-<P>This command sets the style of inter-processor communication that
-occurs each timestep as atom coordinates and other properties are
-exchanged between neighboring processors and stored as properties of
-ghost atoms.
+<P>This command sets parameters that affect the inter-processor
+communication of atom information that occurs each timestep as
+coordinates and other properties are exchanged between neighboring
+processors and stored as properties of ghost atoms.
 </P>
-<P>The default style is <I>single</I> which means each processor acquires
+<P>IMPORTANT NOTE: These options apply to the currently defined comm
+style.  When you specify a <A HREF = "comm_style.html">comm_style</A> command, all
+communication settings are restored to their default values, including
+those previously reset by a comm_modify command.  Thus if your input
+script specifies a comm_style command, you should use the comm_modify
+command after it.
+</P>
+<P>The <I>mode</I> keyword determines whether a single or multiple cutoff
+distances are used to determine which atoms to communicate.
+</P>
+<P>The default mode is <I>single</I> which means each processor acquires
 information for ghost atoms that are within a single distance from its
 sub-domain.  The distance is the maximum of the neighbor cutoff for
 all atom type pairs.
 </P>
 <P>For many systems this is an efficient algorithm, but for systems with
-widely varying cutoffs for different type pairs, the <I>multi</I> style can
+widely varying cutoffs for different type pairs, the <I>multi</I> mode can
 be faster.  In this case, each atom type is assigned its own distance
 cutoff for communication purposes, and fewer atoms will be
 communicated.  See the <A HREF = "neighbor.html">neighbor multi</A> command for a
 neighbor list construction option that may also be beneficial for
 simulations of this kind.
 </P>
-<P>The <I>cutoff</I> option allows you to set a ghost cutoff distance, which
+<P>The <I>cutoff</I> keyword allows you to set a ghost cutoff distance, which
 is the distance from the borders of a processor's sub-domain at which
 ghost atoms are acquired from other processors.  By default the ghost
 cutoff = neighbor cutoff = pairwise force cutoff + neighbor skin.  See
@@ -105,14 +114,14 @@ will typically lead to bad dynamics (i.e. the bond length is now the
 simulation box length).  To detect if this is happening, see the
 <A HREF = "neigh_modify.html">neigh_modify cluster</A> command.
 </P>
-<P>The <I>group</I> option will limit communication to atoms in the specified
+<P>The <I>group</I> keyword will limit communication to atoms in the specified
 group.  This can be useful for models where no ghost atoms are needed
 for some kinds of particles.  All atoms (not just those in the
 specified group) will still migrate to new processors as they move.
 The group specified with this option must also be specified via the
 <A HREF = "atom_modify.html">atom_modify first</A> command.
 </P>
-<P>The <I>vel</I> option enables velocity information to be communicated with
+<P>The <I>vel</I> keyword enables velocity information to be communicated with
 ghost particles.  Depending on the <A HREF = "atom_style.html">atom_style</A>,
 velocity info includes the translational velocity, angular velocity,
 and angular momentum of a particle.  If the <I>vel</I> option is set to
@@ -131,12 +140,12 @@ that boundary (e.g. due to dilation or shear).
 </P>
 <P><B>Related commands:</B>
 </P>
-<P><A HREF = "neighbor.html">neighbor</A>
+<P><A HREF = "comm_style.html">comm_style</A>, <A HREF = "neighbor.html">neighbor</A>
 </P>
 <P><B>Default:</B>
 </P>
-<P>The default settings are style = single, group = all, cutoff = 0.0,
-vel = no.  The cutoff default of 0.0 means that ghost cutoff =
-neighbor cutoff = pairwise force cutoff + neighbor skin.
+<P>The option defauls are mode = single, group = all, cutoff = 0.0, vel =
+no.  The cutoff default of 0.0 means that ghost cutoff = neighbor
+cutoff = pairwise force cutoff + neighbor skin.
 </P>
 </HTML>
diff --git a/doc/communicate.txt b/doc/comm_modify.txt
similarity index 73%
rename from doc/communicate.txt
rename to doc/comm_modify.txt
index 82c8487e70..7a8c2236b8 100644
--- a/doc/communicate.txt
+++ b/doc/comm_modify.txt
@@ -6,15 +6,15 @@
 
 :line
 
-communicate command :h3
+comm_modify command :h3
 
 [Syntax:]
 
-communicate style keyword value ... :pre
+comm_modify keyword value ... :pre
 
-style = {single} or {multi} :ulb,l
-zero or more keyword/value pairs may be appended :l
-keyword = {cutoff} or {group} or {vel} :l
+zero or more keyword/value pairs may be appended :ulb,l
+keyword = {mode} or {cutoff} or {group} or {vel} :l
+  {mode} value = {single} or {multi} = communicate atoms within a single or multiple distances
   {cutoff} value = Rcut (distance units) = communicate atoms from this far away
   {group} value = group-ID = only communicate atoms in the group
   {vel} value = {yes} or {no} = do or do not communicate velocity info with ghost atoms :pre
@@ -22,32 +22,42 @@ keyword = {cutoff} or {group} or {vel} :l
 
 [Examples:]
 
-communicate multi
-communicate multi group solvent
-communicate single vel yes
-communicate single cutoff 5.0 vel yes :pre
+communicate mode multi
+communicate mode multi group solvent
+communicate vel yes
+communicate cutoff 5.0 vel yes :pre
 
 [Description:]
 
-This command sets the style of inter-processor communication that
-occurs each timestep as atom coordinates and other properties are
-exchanged between neighboring processors and stored as properties of
-ghost atoms.
+This command sets parameters that affect the inter-processor
+communication of atom information that occurs each timestep as
+coordinates and other properties are exchanged between neighboring
+processors and stored as properties of ghost atoms.
 
-The default style is {single} which means each processor acquires
+IMPORTANT NOTE: These options apply to the currently defined comm
+style.  When you specify a "comm_style"_comm_style.html command, all
+communication settings are restored to their default values, including
+those previously reset by a comm_modify command.  Thus if your input
+script specifies a comm_style command, you should use the comm_modify
+command after it.
+
+The {mode} keyword determines whether a single or multiple cutoff
+distances are used to determine which atoms to communicate.
+
+The default mode is {single} which means each processor acquires
 information for ghost atoms that are within a single distance from its
 sub-domain.  The distance is the maximum of the neighbor cutoff for
 all atom type pairs.
 
 For many systems this is an efficient algorithm, but for systems with
-widely varying cutoffs for different type pairs, the {multi} style can
+widely varying cutoffs for different type pairs, the {multi} mode can
 be faster.  In this case, each atom type is assigned its own distance
 cutoff for communication purposes, and fewer atoms will be
 communicated.  See the "neighbor multi"_neighbor.html command for a
 neighbor list construction option that may also be beneficial for
 simulations of this kind.
 
-The {cutoff} option allows you to set a ghost cutoff distance, which
+The {cutoff} keyword allows you to set a ghost cutoff distance, which
 is the distance from the borders of a processor's sub-domain at which
 ghost atoms are acquired from other processors.  By default the ghost
 cutoff = neighbor cutoff = pairwise force cutoff + neighbor skin.  See
@@ -98,14 +108,14 @@ will typically lead to bad dynamics (i.e. the bond length is now the
 simulation box length).  To detect if this is happening, see the
 "neigh_modify cluster"_neigh_modify.html command.
 
-The {group} option will limit communication to atoms in the specified
+The {group} keyword will limit communication to atoms in the specified
 group.  This can be useful for models where no ghost atoms are needed
 for some kinds of particles.  All atoms (not just those in the
 specified group) will still migrate to new processors as they move.
 The group specified with this option must also be specified via the
 "atom_modify first"_atom_modify.html command.
 
-The {vel} option enables velocity information to be communicated with
+The {vel} keyword enables velocity information to be communicated with
 ghost particles.  Depending on the "atom_style"_atom_style.html,
 velocity info includes the translational velocity, angular velocity,
 and angular momentum of a particle.  If the {vel} option is set to
@@ -124,10 +134,10 @@ that boundary (e.g. due to dilation or shear).
 
 [Related commands:]
 
-"neighbor"_neighbor.html
+"comm_style"_comm_style.html, "neighbor"_neighbor.html
 
 [Default:]
 
-The default settings are style = single, group = all, cutoff = 0.0,
-vel = no.  The cutoff default of 0.0 means that ghost cutoff =
-neighbor cutoff = pairwise force cutoff + neighbor skin.
+The option defauls are mode = single, group = all, cutoff = 0.0, vel =
+no.  The cutoff default of 0.0 means that ghost cutoff = neighbor
+cutoff = pairwise force cutoff + neighbor skin.
diff --git a/doc/comm_style.html b/doc/comm_style.html
new file mode 100644
index 0000000000..45998dcd4b
--- /dev/null
+++ b/doc/comm_style.html
@@ -0,0 +1,70 @@
+<HTML>
+<CENTER><A HREF = "http://lammps.sandia.gov">LAMMPS WWW Site</A> - <A HREF = "Manual.html">LAMMPS Documentation</A> - <A HREF = "Section_commands.html#comm">LAMMPS Commands</A> 
+</CENTER>
+
+
+
+
+
+
+<HR>
+
+<H3>comm_style command 
+</H3>
+<P><B>Syntax:</B>
+</P>
+<PRE>comm_style style 
+</PRE>
+<UL><LI>style = <I>brick</I> or <I>tiled</I> 
+</UL>
+<P><B>Examples:</B>
+</P>
+<PRE>comm_style brick
+comm_style tiled 
+</PRE>
+<P><B>Description:</B>
+</P>
+<P>This command sets the style of inter-processor communication of atom
+information that occurs each timestep as coordinates and other
+properties are exchanged between neighboring processors and stored as
+properties of ghost atoms.
+</P>
+<P>IMPORTANT NOTE: The <I>tiled</I> style is not yet implemented.
+</P>
+<P>For the default <I>brick</I> style, the domain decomposition used by LAMMPS
+to partition the simulation box must be a regular 3d grid of bricks,
+one per processor.  Each processor communicates with its 6 Cartesian
+neighbors in the grid to acquire information for nearby atoms.
+</P>
+<P>For the <I>tiled</I> style, a more general domain decomposition can be
+used, as triggered by the <A HREF = "balance.html">balance</A> or <A HREF = "fix_balance.html">fix
+balance</A> commands.  The simulation box can be
+partitioned into non-overlapping rectangular-shaped "tiles" or varying
+sizes and shapes.  Again there is one tile per processor.  To acquire
+information for nearby atoms, communication must now be done with a
+more complex pattern of neighboring processors.
+</P>
+<P>Note that this command does not actually define a partitoining of the
+simulation box (a domain decomposition), rather it determines what
+kinds of decompositions are allowed and the pattern of communication
+used to enable the decomposition.  A decomposition is created when the
+simulation box is first created, via the <A HREF = "create_box.html">create_box</A>
+or <A HREF = "read_data.html">read_data</A> or <A HREF = "read_restart.html">read_restart</A>
+commands.  For both the <I>brick</I> and <I>tiled</I> styles, the initial
+decomposition will be the same, as described by
+<A HREF = "create_box.html">create_box</A> and <A HREF = "processors.html">processors</A>
+commands.  The decomposition can be changed via the
+<A HREF = "balance.html">balance</A> or <A HREF = "fix_balance.html">fix_balance</A> commands.
+</P>
+<P><B>Restrictions:</B> none
+</P>
+<P><B>Related commands:</B>
+</P>
+<P><A HREF = "comm_modify.html">comm_modify</A>, <A HREF = "processors.html">processors</A>,
+<A HREF = "balance.html">balance</A>, <A HREF = "fix_balance.html">fix balance</A>
+</P>
+<P><B>Default:</B>
+</P>
+<P>The default style is brick.
+</P>
+</HTML>
diff --git a/doc/comm_style.txt b/doc/comm_style.txt
new file mode 100644
index 0000000000..e62fe02a24
--- /dev/null
+++ b/doc/comm_style.txt
@@ -0,0 +1,65 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+comm_style command :h3
+
+[Syntax:]
+
+comm_style style :pre
+
+style = {brick} or {tiled} :ul
+
+[Examples:]
+
+comm_style brick
+comm_style tiled :pre
+
+[Description:]
+
+This command sets the style of inter-processor communication of atom
+information that occurs each timestep as coordinates and other
+properties are exchanged between neighboring processors and stored as
+properties of ghost atoms.
+
+IMPORTANT NOTE: The {tiled} style is not yet implemented.
+
+For the default {brick} style, the domain decomposition used by LAMMPS
+to partition the simulation box must be a regular 3d grid of bricks,
+one per processor.  Each processor communicates with its 6 Cartesian
+neighbors in the grid to acquire information for nearby atoms.
+
+For the {tiled} style, a more general domain decomposition can be
+used, as triggered by the "balance"_balance.html or "fix
+balance"_fix_balance.html commands.  The simulation box can be
+partitioned into non-overlapping rectangular-shaped "tiles" or varying
+sizes and shapes.  Again there is one tile per processor.  To acquire
+information for nearby atoms, communication must now be done with a
+more complex pattern of neighboring processors.
+
+Note that this command does not actually define a partitoining of the
+simulation box (a domain decomposition), rather it determines what
+kinds of decompositions are allowed and the pattern of communication
+used to enable the decomposition.  A decomposition is created when the
+simulation box is first created, via the "create_box"_create_box.html
+or "read_data"_read_data.html or "read_restart"_read_restart.html
+commands.  For both the {brick} and {tiled} styles, the initial
+decomposition will be the same, as described by
+"create_box"_create_box.html and "processors"_processors.html
+commands.  The decomposition can be changed via the
+"balance"_balance.html or "fix_balance"_fix_balance.html commands.
+
+[Restrictions:] none
+
+[Related commands:]
+
+"comm_modify"_comm_modify.html, "processors"_processors.html,
+"balance"_balance.html, "fix balance"_fix_balance.html
+
+[Default:]
+
+The default style is brick.
diff --git a/doc/create_box.html b/doc/create_box.html
index 210f1c1de3..c7bfde29bd 100644
--- a/doc/create_box.html
+++ b/doc/create_box.html
@@ -44,7 +44,12 @@ create_box 2 mybox bond/types 2 extra/bond/per/atom 1
 </P>
 <P>This command creates a simulation box based on the specified region.
 Thus a <A HREF = "region.html">region</A> command must first be used to define a
-geometric domain.
+geometric domain.  It also partitions the simulation box into a
+regular 3d grid of rectangular bricks, one per processor, based on the
+number of processors being used and the settings of the
+<A HREF = "processors.html">processors</A> command.  The partitioning can later be
+changed by the <A HREF = "balance.html">balance</A> or <A HREF = "fix_balance.html">fix
+balance</A> commands.
 </P>
 <P>The argument N is the number of atom types that will be used in the
 simulation.
@@ -94,13 +99,14 @@ you should not make the lo/hi box dimensions (as defined in your
 of the atoms you eventually plan to create, e.g. via the
 <A HREF = "create_atoms.html">create_atoms</A> command.  For example, if your atoms
 extend from 0 to 50, you should not specify the box bounds as -10000
-and 10000. This is because LAMMPS uses the specified box size to
-layout the 3d grid of processors.  A huge (mostly empty) box will be
-sub-optimal for performance when using "fixed" boundary conditions
-(see the <A HREF = "boundary.html">boundary</A> command).  When using "shrink-wrap"
-boundary conditions (see the <A HREF = "boundary.html">boundary</A> command), a huge
-(mostly empty) box may cause a parallel simulation to lose atoms the
-first time that LAMMPS shrink-wraps the box around the atoms.
+and 10000. This is because as described above, LAMMPS uses the
+specified box size to layout the 3d grid of processors.  A huge
+(mostly empty) box will be sub-optimal for performance when using
+"fixed" boundary conditions (see the <A HREF = "boundary.html">boundary</A>
+command).  When using "shrink-wrap" boundary conditions (see the
+<A HREF = "boundary.html">boundary</A> command), a huge (mostly empty) box may cause
+a parallel simulation to lose atoms the first time that LAMMPS
+shrink-wraps the box around the atoms.
 </P>
 <HR>
 
diff --git a/doc/create_box.txt b/doc/create_box.txt
index 19300566b9..8d4dbdf906 100644
--- a/doc/create_box.txt
+++ b/doc/create_box.txt
@@ -36,7 +36,12 @@ create_box 2 mybox bond/types 2 extra/bond/per/atom 1 :pre
 
 This command creates a simulation box based on the specified region.
 Thus a "region"_region.html command must first be used to define a
-geometric domain.
+geometric domain.  It also partitions the simulation box into a
+regular 3d grid of rectangular bricks, one per processor, based on the
+number of processors being used and the settings of the
+"processors"_processors.html command.  The partitioning can later be
+changed by the "balance"_balance.html or "fix
+balance"_fix_balance.html commands.
 
 The argument N is the number of atom types that will be used in the
 simulation.
@@ -86,13 +91,14 @@ you should not make the lo/hi box dimensions (as defined in your
 of the atoms you eventually plan to create, e.g. via the
 "create_atoms"_create_atoms.html command.  For example, if your atoms
 extend from 0 to 50, you should not specify the box bounds as -10000
-and 10000. This is because LAMMPS uses the specified box size to
-layout the 3d grid of processors.  A huge (mostly empty) box will be
-sub-optimal for performance when using "fixed" boundary conditions
-(see the "boundary"_boundary.html command).  When using "shrink-wrap"
-boundary conditions (see the "boundary"_boundary.html command), a huge
-(mostly empty) box may cause a parallel simulation to lose atoms the
-first time that LAMMPS shrink-wraps the box around the atoms.
+and 10000. This is because as described above, LAMMPS uses the
+specified box size to layout the 3d grid of processors.  A huge
+(mostly empty) box will be sub-optimal for performance when using
+"fixed" boundary conditions (see the "boundary"_boundary.html
+command).  When using "shrink-wrap" boundary conditions (see the
+"boundary"_boundary.html command), a huge (mostly empty) box may cause
+a parallel simulation to lose atoms the first time that LAMMPS
+shrink-wraps the box around the atoms.
 
 :line
 
diff --git a/doc/fix_balance.html b/doc/fix_balance.html
index fafd7607aa..febb14f59e 100644
--- a/doc/fix_balance.html
+++ b/doc/fix_balance.html
@@ -13,7 +13,7 @@
 </H3>
 <P><B>Syntax:</B>
 </P>
-<PRE>fix ID group-ID balance Nfreq dimstr Niter thresh keyword value ... 
+<PRE>fix ID group-ID balance Nfreq thresh style args keyword value ... 
 </PRE>
 <UL><LI>ID, group-ID are documented in <A HREF = "fix.html">fix</A> command 
 
@@ -21,76 +21,130 @@
 
 <LI>Nfreq = perform dynamic load balancing every this many steps 
 
-<LI>dimstr = sequence of letters containing "x" or "y" or "z", each not more than once 
+<LI>thresh = imbalance threshhold that must be exceeded to perform a re-balance 
 
-<LI>Niter = # of times to iterate within each dimension of dimstr sequence 
+<LI>style = <I>shift</I> or <I>rcb</I> 
 
-<LI>thresh = stop balancing when this imbalance threshhold is reached 
+<PRE>  shift args = dimstr Niter stopthresh
+    dimstr = sequence of letters containing "x" or "y" or "z", each not more than once
+    Niter = # of times to iterate within each dimension of dimstr sequence
+    stopthresh = stop balancing when this imbalance threshhold is reached
+  rcb args = none 
+</PRE>
+<LI>zero or more keyword/value pairs may be appended 
 
-<LI>zero or more keyword/arg pairs may be appended 
-</UL>
 <LI>keyword = <I>out</I> 
 
-<PRE> <I>out</I> arg = filename
-   filename = output file to write each processor's sub-domain to 
+<PRE>  <I>out</I> value = filename
+    filename = write each processor's sub-domain to a file, at each re-balancing 
 </PRE>
 
 </UL>
 <P><B>Examples:</B>
 </P>
-<PRE>fix 2 all balance 1000 x 10 1.05
-fix 2 all balance 0 xy 20 1.1 out tmp.balance 
+<PRE>fix 2 all balance 1000 1.05 shift x 10 1.05
+fix 2 all balance 100 0.9 shift xy 20 1.1 out tmp.balance
+fix 2 all balance 1000 1.1 rcb 
 </PRE>
 <P><B>Description:</B>
 </P>
-<P>This command adjusts the size of processor sub-domains within the
-simulation box dynamically as a simulation runs, to attempt to balance
-the number of particles and thus the computational cost (load) evenly
-across processors. The load balancing is "dynamic" in the sense that
+<P>This command adjusts the size and shape of processor sub-domains
+within the simulation box, to attempt to balance the number of
+particles and thus the computational cost (load) evenly across
+processors.  The load balancing is "dynamic" in the sense that
 rebalancing is performed periodically during the simulation.  To
-perform "static" balancing, before of between runs, see the
+perform "static" balancing, before or between runs, see the
 <A HREF = "balance.html">balance</A> command.
 </P>
-<P>Load-balancing is only useful if the particles in the simulation box
-have a spatially-varying density distribution.  E.g. a model of a
-vapor/liquid interface, or a solid with an irregular-shaped geometry
-containing void regions. In this case, the LAMMPS default of dividing
-the simulation box volume into a regular-spaced grid of processor
-sub-domain, with one equal-volume sub-domain per procesor, may assign
-very different numbers of particles per processor. This can lead to
-poor performance in a scalability sense, when the simulation is run in
+<P>Load-balancing is typically only useful if the particles in the
+simulation box have a spatially-varying density distribution.  E.g. a
+model of a vapor/liquid interface, or a solid with an irregular-shaped
+geometry containing void regions.  In this case, the LAMMPS default of
+dividing the simulation box volume into a regular-spaced grid of 3d
+bricks, with one equal-volume sub-domain per procesor, may assign very
+different numbers of particles per processor.  This can lead to poor
+performance in a scalability sense, when the simulation is run in
 parallel.
 </P>
-<P>Note that the <A HREF = "processors.html">processors</A> command gives you some
-control over how the box volume is split across
-processors. Specifically, for a Px by Py by Pz grid of processors, it
-lets you choose Px, Py, and Pz, subject to the constraint that Px * Py
-* Pz = P, the total number of processors. This can be sufficient to
-achieve good load-balance for some models on some processor
-counts. However, all the processor sub-domains will still be the same
-shape and have the same volume.
+<P>Note that the <A HREF = "processors.html">processors</A> command allows some control
+over how the box volume is split across processors.  Specifically, for
+a Px by Py by Pz grid of processors, it allows choice of Px, Py, and
+Pz, subject to the constraint that Px * Py * Pz = P, the total number
+of processors.  This is sufficient to achieve good load-balance for
+many models on many processor counts.  However, all the processor
+sub-domains will still have the same shape and same volume.
 </P>
-<P>This command does not alter the topology of the Px by Py by Pz grid or
-processors. But it shifts the cutting planes between processors (in
-3d, or lines in 2d), which adjusts the volume (area in 2d) assigned to
-each processor, as in the following 2d diagram. The left diagram is
-the default partitioning of the simulation box across processors (one
-sub-box for each of 16 processors); the right diagram is after
-balancing.
+<P>On a particular timestep, a load-balancing operation is only performed
+if the current "imbalance factor" in particles owned by each processor
+exceeds the specified <I>thresh</I> parameter.  This factor is defined as
+the maximum number of particles owned by any processor, divided by the
+average number of particles per processor.  Thus an imbalance factor
+of 1.0 is perfect balance.  For 10000 particles running on 10
+processors, if the most heavily loaded processor has 1200 particles,
+then the factor is 1.2, meaning there is a 20% imbalance.  Note that
+re-balances can be forced even if the current balance is perfect (1.0)
+be specifying a <I>thresh</I> < 1.0.
+</P>
+<P>IMPORTANT NOTE: This command attempts to minimize the imbalance
+factor, as defined above.  But depending on the method a perfect
+balance (1.0) may not be achieved.  For example, "grid" methods
+(defined below) that create a logical 3d grid cannot achieve perfect
+balance for many irregular distributions of particles.  Likewise, if a
+portion of the system is a perfect lattice, e.g. the intiial system is
+generated by the <A HREF = "create_atoms.html">create_atoms</A> command, then "grid"
+methods may be unable to achieve exact balance.  This is because
+entire lattice planes will be owned or not owned by a single
+processor.
+</P>
+<P>IMPORTANT NOTE: Computational cost is not strictly proportional to
+particle count, and changing the relative size and shape of processor
+sub-domains may lead to additional computational and communication
+overheads, e.g. in the PPPM solver used via the
+<A HREF = "kspace_style.html">kspace_style</A> command.  Thus you should benchmark
+the run times of a simulation before and after balancing.
+</P>
+<HR>
+
+<P>The method used to perform a load balance is specified by one of the
+listed styles, which are described in detail below.  There are 2 kinds
+of styles.
+</P>
+<P>The <I>shift</I> style is a "grid" method which produces a logical 3d grid
+of processors.  It operates by changing the cutting planes (or lines)
+between processors in 3d (or 2d), to adjust the volume (area in 2d)
+assigned to each processor, as in the following 2d diagram.  The left
+diagram is the default partitioning of the simulation box across
+processors (one sub-box for each of 16 processors); the right diagram
+is after balancing.
 </P>
 <CENTER><IMG SRC = "JPG/balance.jpg">
 </CENTER>
-<P>IMPORTANT NOTE: This command attempts to minimize the imbalance
-factor, as defined above.  But because of the topology constraint that
-only the cutting planes (lines) between processors are moved, there
-are many irregular distributions of particles, where this factor
-cannot be shrunk to 1.0, particuarly in 3d.  Also, computational cost
-is not strictly proportional to particle count, and changing the
-relative size and shape of processor sub-domains may lead to
-additional computational and communication overheads, e.g. in the PPPM
-solver used via the <A HREF = "kspace_style.html">kspace_style</A> command.  Thus
-you should benchmark the run times of your simulation with and without
-balancing.
+<P>The <I>rcb</I> style is a "tiling" method which does not produce a logical
+3d grid of processors.  Rather it tiles the simulation domain with
+rectangular sub-boxes of varying size and shape in an irregular
+fashion so as to have equal numbers of particles in each sub-box, as
+in the following 2d diagram.  Again the left diagram is the default
+partitioning of the simulation box across processors (one sub-box for
+each of 16 processors); the right diagram is after balancing.
+</P>
+<P>NOTE: Need a diagram of RCB partitioning.
+</P>
+<P>The "grid" methods can be used with either of the
+<A HREF = "comm_style.html">comm_style</A> command options, <I>brick</I> or <I>tiled</I>.  The
+"tiling" methods can only be used with <A HREF = "comm_style.html">comm_style
+tiled</A>.
+</P>
+<P>When a "grid" method is specified, the current domain partitioning can
+be either a logical 3d grid or a tiled partitioning.  In the former
+case, the current logical 3d grid is used as a starting point and
+changes are made to improve the imbalance factor.  In the latter case,
+the tiled partitioning is discarded and a logical 3d grid is created
+with uniform spacing in all dimensions.  This becomes the starting
+point for the balancing operation.
+</P>
+<P>When a "tiling" method is specified, the current domain partitioning
+("grid" or "tiled") is ignored, and a new partitioning is computed
+from scratch.
 </P>
 <HR>
 
@@ -103,8 +157,8 @@ particles.
 </P>
 <P>The <I>Nfreq</I> setting determines how often a rebalance is performed.  If
 <I>Nfreq</I> > 0, then rebalancing will occur every <I>Nfreq</I> steps.  Each
-time a rebalance occurs, a reneighboring is triggered, so you should
-not make <I>Nfreq</I> too small.  If <I>Nfreq</I> = 0, then rebalancing will be
+time a rebalance occurs, a reneighboring is triggered, so <I>Nfreq</I>
+should not be too small.  If <I>Nfreq</I> = 0, then rebalancing will be
 done every time reneighboring normally occurs, as determined by the
 the <A HREF = "neighbor.html">neighbor</A> and <A HREF = "neigh_modify.html">neigh_modify</A>
 command settings.
@@ -112,6 +166,12 @@ command settings.
 <P>On rebalance steps, rebalancing will only be attempted if the current
 imbalance factor, as defined above, exceeds the <I>thresh</I> setting.
 </P>
+<HR>
+
+<P>The <I>shift</I> style invokes a "grid" method for balancing, as described
+above.  It changes the positions of cutting planes between processors
+in an iterative fashion, seeking to reduce the imbalance factor.
+</P>
 <P>The <I>dimstr</I> argument is a string of characters, each of which must be
 an "x" or "y" or "z".  Eacn character can appear zero or one time,
 since there is no advantage to balancing on a dimension more than
@@ -122,61 +182,61 @@ to be a density variation in the particles.
 dimensions listed in <I>dimstr</I>, one dimension at a time.  For a single
 dimension, the balancing operation (described below) is iterated on up
 to <I>Niter</I> times.  After each dimension finishes, the imbalance factor
-is re-computed, and the balancing operation halts if the <I>thresh</I>
+is re-computed, and the balancing operation halts if the <I>stopthresh</I>
 criterion is met.
 </P>
 <P>A rebalance operation in a single dimension is performed using a
 density-dependent recursive multisectioning algorithm, where the
 position of each cutting plane (line in 2d) in the dimension is
 adjusted independently.  This is similar to a recursive bisectioning
-(RCB) for a single value, except that the bounds used for each
-bisectioning take advantage of information from neighboring cuts if
-possible, as well as counts of particles at the bounds on either side
-of each cuts, which themselves were cuts in previous iterations.  The
-latter is used to infer a density of pariticles near each of the
-current cuts.  At each iteration, the count of particles on either
-side of each plane is tallied.  If the counts do not match the target
-value for the plane, the position of the cut is adjusted based on the
-local density.  The low and high bounds are adjusted on each
-iteration, using new count information, so that they become closer
-together over time.  Thus as the recustion progresses, the count of
-particles on either side of the plane gets closer to the target value.
+for a single value, except that the bounds used for each bisectioning
+take advantage of information from neighboring cuts if possible, as
+well as counts of particles at the bounds on either side of each cuts,
+which themselves were cuts in previous iterations.  The latter is used
+to infer a density of pariticles near each of the current cuts.  At
+each iteration, the count of particles on either side of each plane is
+tallied.  If the counts do not match the target value for the plane,
+the position of the cut is adjusted based on the local density.  The
+low and high bounds are adjusted on each iteration, using new count
+information, so that they become closer together over time.  Thus as
+the recustion progresses, the count of particles on either side of the
+plane gets closer to the target value.
 </P>
 <P>The density-dependent part of this algorithm is often an advantage
 when you rebalance a system that is already nearly balanced.  It
 typically converges more quickly than the geometric bisectioning
 algorithm used by the <A HREF = "balance.html">balance</A> command.  However, if can
-be a disadvants if you attempt to rebalance a system that is far from
-balanced, and converge more slowly.  In this case you probably want to
-use the <A HREF = "balance.html">balance</A> command before starting a run, so that
-you begin the run with a balanced system.
+be a disadvantage if you attempt to rebalance a system that is far
+from balanced, and converge more slowly.  In this case you probably
+want to use the <A HREF = "balance.html">balance</A> command before starting a run,
+so that you begin the run with a balanced system.
 </P>
 <P>Once the rebalancing is complete and final processor sub-domains
 assigned, particles migrate to their new owning processor as part of
 the normal reneighboring procedure.
 </P>
-<P>IMPORTANT NOTE: At each rebalance operation, the RCB operation for
-each cutting plane (line in 2d) typcially starts with low and high
-bounds separated by the extent of a processor's sub-domain in one
-dimension.  The size of this bracketing region shrinks based on the
-local density, as described above, which should typically be 1/2 or
-more every iteration.  Thus if <I>Niter</I> is specified as 10, the cutting
-plane will typically be positioned to better than 1 part in 1000
-accuracy (relative to the perfect target position).  For <I>Niter</I> = 20,
-it will be accurate to better than 1 part in a million.  Thus there is
-no need to set <I>Niter</I> to a large value.  This is especially true if
-you are rebalancing often enough that each time you expect only an
-incremental adjustement in the cutting planes is necessary.  LAMMPS
-will check if the threshold accuracy is reached (in a dimension) is
-less iterations than <I>Niter</I> and exit early.
+<P>IMPORTANT NOTE: At each rebalance operation, the bisectioning for each
+cutting plane (line in 2d) typcially starts with low and high bounds
+separated by the extent of a processor's sub-domain in one dimension.
+The size of this bracketing region shrinks based on the local density,
+as described above, which should typically be 1/2 or more every
+iteration.  Thus if <I>Niter</I> is specified as 10, the cutting plane will
+typically be positioned to better than 1 part in 1000 accuracy
+(relative to the perfect target position).  For <I>Niter</I> = 20, it will
+be accurate to better than 1 part in a million.  Thus there is no need
+to set <I>Niter</I> to a large value.  This is especially true if you are
+rebalancing often enough that each time you expect only an incremental
+adjustement in the cutting planes is necessary.  LAMMPS will check if
+the threshold accuracy is reached (in a dimension) is less iterations
+than <I>Niter</I> and exit early.
 </P>
-<P>IMPORTANT NOTE: If a portion of your system is a perfect lattice,
-e.g. a frozen substrate, then the balancer may be unable to achieve
-exact balance.  I.e. entire lattice planes will be owned or not owned
-by a single processor.  So you you should not expect to achieve
-perfect balance in this case.  Nor will it be helpful to use a large
-value for <I>Niter</I>, since it will simply cause the balancer to iterate
-until <I>Niter</I> is reached, without improving the imbalance factor.
+<HR>
+
+<P>The <I>rcb</I> style invokes a "tiled" method for balancing, as described
+above.  It performs a recursive coordinate bisectioning (RCB) of the
+simulation domain.
+</P>
+<P>Need further description of RCB.
 </P>
 <HR>
 
@@ -262,7 +322,10 @@ minimization</A>.
 </P>
 <HR>
 
-<P><B>Restrictions:</B> none
+<P><B>Restrictions:</B>
+</P>
+<P>For 2d simulations, a "z" cannot appear in <I>dimstr</I> for the <I>shift</I>
+style.
 </P>
 <P><B>Related commands:</B>
 </P>
diff --git a/doc/fix_balance.txt b/doc/fix_balance.txt
index ae5f076f72..c39c9f3cff 100644
--- a/doc/fix_balance.txt
+++ b/doc/fix_balance.txt
@@ -10,75 +10,129 @@ fix balance command :h3
 
 [Syntax:]
 
-fix ID group-ID balance Nfreq dimstr Niter thresh keyword value ... :pre
+fix ID group-ID balance Nfreq thresh style args keyword value ... :pre
 
 ID, group-ID are documented in "fix"_fix.html command :ulb,l
 balance = style name of this fix command :l
 Nfreq = perform dynamic load balancing every this many steps :l
-dimstr = sequence of letters containing "x" or "y" or "z", each not more than once :l
-Niter = # of times to iterate within each dimension of dimstr sequence :l
-thresh = stop balancing when this imbalance threshhold is reached :l
-zero or more keyword/arg pairs may be appended :ule,l
+thresh = imbalance threshhold that must be exceeded to perform a re-balance :l
+style = {shift} or {rcb} :l
+  shift args = dimstr Niter stopthresh
+    dimstr = sequence of letters containing "x" or "y" or "z", each not more than once
+    Niter = # of times to iterate within each dimension of dimstr sequence
+    stopthresh = stop balancing when this imbalance threshhold is reached
+  rcb args = none :pre
+zero or more keyword/value pairs may be appended :l
 keyword = {out} :l
- {out} arg = filename
-   filename = output file to write each processor's sub-domain to :pre
+  {out} value = filename
+    filename = write each processor's sub-domain to a file, at each re-balancing :pre
 :ule
 
 [Examples:]
 
-fix 2 all balance 1000 x 10 1.05
-fix 2 all balance 0 xy 20 1.1 out tmp.balance :pre
+fix 2 all balance 1000 1.05 shift x 10 1.05
+fix 2 all balance 100 0.9 shift xy 20 1.1 out tmp.balance
+fix 2 all balance 1000 1.1 rcb :pre
 
 [Description:]
 
-This command adjusts the size of processor sub-domains within the
-simulation box dynamically as a simulation runs, to attempt to balance
-the number of particles and thus the computational cost (load) evenly
-across processors. The load balancing is "dynamic" in the sense that
+This command adjusts the size and shape of processor sub-domains
+within the simulation box, to attempt to balance the number of
+particles and thus the computational cost (load) evenly across
+processors.  The load balancing is "dynamic" in the sense that
 rebalancing is performed periodically during the simulation.  To
-perform "static" balancing, before of between runs, see the
+perform "static" balancing, before or between runs, see the
 "balance"_balance.html command.
 
-Load-balancing is only useful if the particles in the simulation box
-have a spatially-varying density distribution.  E.g. a model of a
-vapor/liquid interface, or a solid with an irregular-shaped geometry
-containing void regions. In this case, the LAMMPS default of dividing
-the simulation box volume into a regular-spaced grid of processor
-sub-domain, with one equal-volume sub-domain per procesor, may assign
-very different numbers of particles per processor. This can lead to
-poor performance in a scalability sense, when the simulation is run in
+Load-balancing is typically only useful if the particles in the
+simulation box have a spatially-varying density distribution.  E.g. a
+model of a vapor/liquid interface, or a solid with an irregular-shaped
+geometry containing void regions.  In this case, the LAMMPS default of
+dividing the simulation box volume into a regular-spaced grid of 3d
+bricks, with one equal-volume sub-domain per procesor, may assign very
+different numbers of particles per processor.  This can lead to poor
+performance in a scalability sense, when the simulation is run in
 parallel.
 
-Note that the "processors"_processors.html command gives you some
-control over how the box volume is split across
-processors. Specifically, for a Px by Py by Pz grid of processors, it
-lets you choose Px, Py, and Pz, subject to the constraint that Px * Py
-* Pz = P, the total number of processors. This can be sufficient to
-achieve good load-balance for some models on some processor
-counts. However, all the processor sub-domains will still be the same
-shape and have the same volume.
+Note that the "processors"_processors.html command allows some control
+over how the box volume is split across processors.  Specifically, for
+a Px by Py by Pz grid of processors, it allows choice of Px, Py, and
+Pz, subject to the constraint that Px * Py * Pz = P, the total number
+of processors.  This is sufficient to achieve good load-balance for
+many models on many processor counts.  However, all the processor
+sub-domains will still have the same shape and same volume.
 
-This command does not alter the topology of the Px by Py by Pz grid or
-processors. But it shifts the cutting planes between processors (in
-3d, or lines in 2d), which adjusts the volume (area in 2d) assigned to
-each processor, as in the following 2d diagram. The left diagram is
-the default partitioning of the simulation box across processors (one
-sub-box for each of 16 processors); the right diagram is after
-balancing.
+On a particular timestep, a load-balancing operation is only performed
+if the current "imbalance factor" in particles owned by each processor
+exceeds the specified {thresh} parameter.  This factor is defined as
+the maximum number of particles owned by any processor, divided by the
+average number of particles per processor.  Thus an imbalance factor
+of 1.0 is perfect balance.  For 10000 particles running on 10
+processors, if the most heavily loaded processor has 1200 particles,
+then the factor is 1.2, meaning there is a 20% imbalance.  Note that
+re-balances can be forced even if the current balance is perfect (1.0)
+be specifying a {thresh} < 1.0.
+
+IMPORTANT NOTE: This command attempts to minimize the imbalance
+factor, as defined above.  But depending on the method a perfect
+balance (1.0) may not be achieved.  For example, "grid" methods
+(defined below) that create a logical 3d grid cannot achieve perfect
+balance for many irregular distributions of particles.  Likewise, if a
+portion of the system is a perfect lattice, e.g. the intiial system is
+generated by the "create_atoms"_create_atoms.html command, then "grid"
+methods may be unable to achieve exact balance.  This is because
+entire lattice planes will be owned or not owned by a single
+processor.
+
+IMPORTANT NOTE: Computational cost is not strictly proportional to
+particle count, and changing the relative size and shape of processor
+sub-domains may lead to additional computational and communication
+overheads, e.g. in the PPPM solver used via the
+"kspace_style"_kspace_style.html command.  Thus you should benchmark
+the run times of a simulation before and after balancing.
+
+:line
+
+The method used to perform a load balance is specified by one of the
+listed styles, which are described in detail below.  There are 2 kinds
+of styles.
+
+The {shift} style is a "grid" method which produces a logical 3d grid
+of processors.  It operates by changing the cutting planes (or lines)
+between processors in 3d (or 2d), to adjust the volume (area in 2d)
+assigned to each processor, as in the following 2d diagram.  The left
+diagram is the default partitioning of the simulation box across
+processors (one sub-box for each of 16 processors); the right diagram
+is after balancing.
 
 :c,image(JPG/balance.jpg)
 
-IMPORTANT NOTE: This command attempts to minimize the imbalance
-factor, as defined above.  But because of the topology constraint that
-only the cutting planes (lines) between processors are moved, there
-are many irregular distributions of particles, where this factor
-cannot be shrunk to 1.0, particuarly in 3d.  Also, computational cost
-is not strictly proportional to particle count, and changing the
-relative size and shape of processor sub-domains may lead to
-additional computational and communication overheads, e.g. in the PPPM
-solver used via the "kspace_style"_kspace_style.html command.  Thus
-you should benchmark the run times of your simulation with and without
-balancing.
+The {rcb} style is a "tiling" method which does not produce a logical
+3d grid of processors.  Rather it tiles the simulation domain with
+rectangular sub-boxes of varying size and shape in an irregular
+fashion so as to have equal numbers of particles in each sub-box, as
+in the following 2d diagram.  Again the left diagram is the default
+partitioning of the simulation box across processors (one sub-box for
+each of 16 processors); the right diagram is after balancing.
+
+NOTE: Need a diagram of RCB partitioning.
+
+The "grid" methods can be used with either of the
+"comm_style"_comm_style.html command options, {brick} or {tiled}.  The
+"tiling" methods can only be used with "comm_style
+tiled"_comm_style.html.
+
+When a "grid" method is specified, the current domain partitioning can
+be either a logical 3d grid or a tiled partitioning.  In the former
+case, the current logical 3d grid is used as a starting point and
+changes are made to improve the imbalance factor.  In the latter case,
+the tiled partitioning is discarded and a logical 3d grid is created
+with uniform spacing in all dimensions.  This becomes the starting
+point for the balancing operation.
+
+When a "tiling" method is specified, the current domain partitioning
+("grid" or "tiled") is ignored, and a new partitioning is computed
+from scratch.
 
 :line
 
@@ -91,8 +145,8 @@ particles.
 
 The {Nfreq} setting determines how often a rebalance is performed.  If
 {Nfreq} > 0, then rebalancing will occur every {Nfreq} steps.  Each
-time a rebalance occurs, a reneighboring is triggered, so you should
-not make {Nfreq} too small.  If {Nfreq} = 0, then rebalancing will be
+time a rebalance occurs, a reneighboring is triggered, so {Nfreq}
+should not be too small.  If {Nfreq} = 0, then rebalancing will be
 done every time reneighboring normally occurs, as determined by the
 the "neighbor"_neighbor.html and "neigh_modify"_neigh_modify.html
 command settings.
@@ -100,6 +154,12 @@ command settings.
 On rebalance steps, rebalancing will only be attempted if the current
 imbalance factor, as defined above, exceeds the {thresh} setting.
 
+:line
+
+The {shift} style invokes a "grid" method for balancing, as described
+above.  It changes the positions of cutting planes between processors
+in an iterative fashion, seeking to reduce the imbalance factor.
+
 The {dimstr} argument is a string of characters, each of which must be
 an "x" or "y" or "z".  Eacn character can appear zero or one time,
 since there is no advantage to balancing on a dimension more than
@@ -110,61 +170,61 @@ Balancing proceeds by adjusting the cutting planes in each of the
 dimensions listed in {dimstr}, one dimension at a time.  For a single
 dimension, the balancing operation (described below) is iterated on up
 to {Niter} times.  After each dimension finishes, the imbalance factor
-is re-computed, and the balancing operation halts if the {thresh}
+is re-computed, and the balancing operation halts if the {stopthresh}
 criterion is met.
 
 A rebalance operation in a single dimension is performed using a
 density-dependent recursive multisectioning algorithm, where the
 position of each cutting plane (line in 2d) in the dimension is
 adjusted independently.  This is similar to a recursive bisectioning
-(RCB) for a single value, except that the bounds used for each
-bisectioning take advantage of information from neighboring cuts if
-possible, as well as counts of particles at the bounds on either side
-of each cuts, which themselves were cuts in previous iterations.  The
-latter is used to infer a density of pariticles near each of the
-current cuts.  At each iteration, the count of particles on either
-side of each plane is tallied.  If the counts do not match the target
-value for the plane, the position of the cut is adjusted based on the
-local density.  The low and high bounds are adjusted on each
-iteration, using new count information, so that they become closer
-together over time.  Thus as the recustion progresses, the count of
-particles on either side of the plane gets closer to the target value.
+for a single value, except that the bounds used for each bisectioning
+take advantage of information from neighboring cuts if possible, as
+well as counts of particles at the bounds on either side of each cuts,
+which themselves were cuts in previous iterations.  The latter is used
+to infer a density of pariticles near each of the current cuts.  At
+each iteration, the count of particles on either side of each plane is
+tallied.  If the counts do not match the target value for the plane,
+the position of the cut is adjusted based on the local density.  The
+low and high bounds are adjusted on each iteration, using new count
+information, so that they become closer together over time.  Thus as
+the recustion progresses, the count of particles on either side of the
+plane gets closer to the target value.
 
 The density-dependent part of this algorithm is often an advantage
 when you rebalance a system that is already nearly balanced.  It
 typically converges more quickly than the geometric bisectioning
 algorithm used by the "balance"_balance.html command.  However, if can
-be a disadvants if you attempt to rebalance a system that is far from
-balanced, and converge more slowly.  In this case you probably want to
-use the "balance"_balance.html command before starting a run, so that
-you begin the run with a balanced system.
+be a disadvantage if you attempt to rebalance a system that is far
+from balanced, and converge more slowly.  In this case you probably
+want to use the "balance"_balance.html command before starting a run,
+so that you begin the run with a balanced system.
 
 Once the rebalancing is complete and final processor sub-domains
 assigned, particles migrate to their new owning processor as part of
 the normal reneighboring procedure.
 
-IMPORTANT NOTE: At each rebalance operation, the RCB operation for
-each cutting plane (line in 2d) typcially starts with low and high
-bounds separated by the extent of a processor's sub-domain in one
-dimension.  The size of this bracketing region shrinks based on the
-local density, as described above, which should typically be 1/2 or
-more every iteration.  Thus if {Niter} is specified as 10, the cutting
-plane will typically be positioned to better than 1 part in 1000
-accuracy (relative to the perfect target position).  For {Niter} = 20,
-it will be accurate to better than 1 part in a million.  Thus there is
-no need to set {Niter} to a large value.  This is especially true if
-you are rebalancing often enough that each time you expect only an
-incremental adjustement in the cutting planes is necessary.  LAMMPS
-will check if the threshold accuracy is reached (in a dimension) is
-less iterations than {Niter} and exit early.
+IMPORTANT NOTE: At each rebalance operation, the bisectioning for each
+cutting plane (line in 2d) typcially starts with low and high bounds
+separated by the extent of a processor's sub-domain in one dimension.
+The size of this bracketing region shrinks based on the local density,
+as described above, which should typically be 1/2 or more every
+iteration.  Thus if {Niter} is specified as 10, the cutting plane will
+typically be positioned to better than 1 part in 1000 accuracy
+(relative to the perfect target position).  For {Niter} = 20, it will
+be accurate to better than 1 part in a million.  Thus there is no need
+to set {Niter} to a large value.  This is especially true if you are
+rebalancing often enough that each time you expect only an incremental
+adjustement in the cutting planes is necessary.  LAMMPS will check if
+the threshold accuracy is reached (in a dimension) is less iterations
+than {Niter} and exit early.
 
-IMPORTANT NOTE: If a portion of your system is a perfect lattice,
-e.g. a frozen substrate, then the balancer may be unable to achieve
-exact balance.  I.e. entire lattice planes will be owned or not owned
-by a single processor.  So you you should not expect to achieve
-perfect balance in this case.  Nor will it be helpful to use a large
-value for {Niter}, since it will simply cause the balancer to iterate
-until {Niter} is reached, without improving the imbalance factor.
+:line
+
+The {rcb} style invokes a "tiled" method for balancing, as described
+above.  It performs a recursive coordinate bisectioning (RCB) of the
+simulation domain.
+
+Need further description of RCB.
 
 :line
 
@@ -250,7 +310,10 @@ minimization"_minimize.html.
 
 :line
 
-[Restrictions:] none
+[Restrictions:]
+
+For 2d simulations, a "z" cannot appear in {dimstr} for the {shift}
+style.
 
 [Related commands:]
 
diff --git a/doc/processors.html b/doc/processors.html
index 4180afea75..d17b90cbff 100644
--- a/doc/processors.html
+++ b/doc/processors.html
@@ -57,12 +57,12 @@ processors * * * part 1 2 multiple
 </PRE>
 <P><B>Description:</B>
 </P>
-<P>Specify how processors are mapped as a 3d logical grid to the global
-simulation box.  This involves 2 steps.  First if there are P
+<P>Specify how processors are mapped as a regular 3d grid to the global
+simulation box.  The mapping involves 2 steps.  First if there are P
 processors it means choosing a factorization P = Px by Py by Pz so
 that there are Px processors in the x dimension, and similarly for the
 y and z dimensions.  Second, the P processors are mapped to the
-logical 3d grid.  The arguments to this command control each of these
+regular 3d grid.  The arguments to this command control each of these
 2 steps.
 </P>
 <P>The Px, Py, Pz parameters affect the factorization.  Any of the 3
@@ -72,12 +72,11 @@ It will do this based on the size and shape of the global simulation
 box so as to minimize the surface-to-volume ratio of each processor's
 sub-domain.
 </P>
-<P>Since LAMMPS does not load-balance by changing the grid of 3d
-processors on-the-fly, choosing explicit values for Px or Py or Pz can
-be used to override the LAMMPS default if it is known to be
-sub-optimal for a particular problem.  E.g. a problem where the extent
-of atoms will change dramatically in a particular dimension over the
-course of the simulation.
+<P>Choosing explicit values for Px or Py or Pz can be used to override
+the default manner in which LAMMPS will create the regular 3d grid of
+processors, if it is known to be sub-optimal for a particular problem.
+E.g. a problem where the extent of atoms will change dramatically in a
+particular dimension over the course of the simulation.
 </P>
 <P>The product of Px, Py, Pz must equal P, the total # of processors
 LAMMPS is running on.  For a <A HREF = "dimension.html">2d simulation</A>, Pz must
@@ -101,6 +100,28 @@ different processor grids for different partitions, e.g.
 <PRE>partition yes 1 processors 4 4 4
 partition yes 2 processors 2 3 2 
 </PRE>
+<P>IMPORTANT NOTE: This command only affects the initial regular 3d grid
+created when the simulation box is first specified via a
+<A HREF = "create_box.html">create_box</A> or <A HREF = "read_data.html">read_data</A> or
+<A HREF = "read_restart.html">read_restart</A> command.  Or if the simulation box is
+re-created via the <A HREF = "replicate.html">replicate</A> command.  The same
+regular grid is initially created, regardless of which
+<A HREF = "comm_style.html">comm_style</A> command is in effect.
+</P>
+<P>If load-balancing is never invoked via the <A HREF = "balance.html">balance</A> or
+<A HREF = "fix_balance.html">fix balance</A> commands, then the initial regular grid
+will persist for all simulations.  If balancing is performed, some of
+the methods invoked by those commands retain the logical toplogy of
+the initial 3d grid, and the mapping of processors to the grid
+specified by the processors command.  However the grid spacings in
+different dimensions may change, so that processors own sub-domains of
+different sizes.  If the <A HREF = "comm_style.html">comm_style tiled</A> command is
+used, methods invoked by the balancing commands may discard the 3d
+grid of processors and tile the simulation domain with sub-domains of
+different sizes and shapes which no longer have a logical 3d
+connectivity.  If that occurs, all the information specified by the
+processors command is ignored.
+</P>
 <HR>
 
 <P>The <I>grid</I> keyword affects the factorization of P into Px,Py,Pz and it
@@ -144,7 +165,7 @@ access (NUMA) costs.  It also uses a different algorithm than the
 <I>twolevel</I> keyword for doing the two-level factorization of the
 simulation box into a 3d processor grid to minimize off-node
 communication, and it does its own MPI-based mapping of nodes and
-cores to the logical 3d grid.  Thus it may produce a different layout
+cores to the regular 3d grid.  Thus it may produce a different layout
 of the processors than the <I>twolevel</I> options.
 </P>
 <P>The <I>numa</I> style will give an error if the number of MPI processes is
@@ -239,11 +260,11 @@ and <I>Precv</I> must be integers from 1 to Np, where Np is the number of
 partitions you have defined via the <A HREF = "Section_start.html#start_7">-partition command-line
 switch</A>.
 </P>
-<P>A "dependency" means that the sending partition will create its 3d
-logical grid as Px by Py by Pz and after it has done this, it will
+<P>A "dependency" means that the sending partition will create its
+regular 3d grid as Px by Py by Pz and after it has done this, it will
 send the Px,Py,Pz values to the receiving partition.  The receiving
-partition will wait to receive these values before creating its own 3d
-logical grid and will use the sender's Px,Py,Pz values as a
+partition will wait to receive these values before creating its own
+regular 3d grid and will use the sender's Px,Py,Pz values as a
 constraint.  The nature of the constraint is determined by the
 <I>cstyle</I> argument.
 </P>
@@ -294,7 +315,7 @@ The universe and original IDs will only be different if you used the
 the processors differently than their rank in the original
 communicator LAMMPS was instantiated with.
 </P>
-<P>I,J,K are the indices of the processor in the 3d logical grid, each
+<P>I,J,K are the indices of the processor in the regular 3d grid, each
 from 1 to Nd, where Nd is the number of processors in that dimension
 of the grid.
 </P>
diff --git a/doc/processors.txt b/doc/processors.txt
index e1e410f0fc..7abc5faf4d 100644
--- a/doc/processors.txt
+++ b/doc/processors.txt
@@ -50,12 +50,12 @@ processors * * * part 1 2 multiple :pre
 
 [Description:]
 
-Specify how processors are mapped as a 3d logical grid to the global
-simulation box.  This involves 2 steps.  First if there are P
+Specify how processors are mapped as a regular 3d grid to the global
+simulation box.  The mapping involves 2 steps.  First if there are P
 processors it means choosing a factorization P = Px by Py by Pz so
 that there are Px processors in the x dimension, and similarly for the
 y and z dimensions.  Second, the P processors are mapped to the
-logical 3d grid.  The arguments to this command control each of these
+regular 3d grid.  The arguments to this command control each of these
 2 steps.
 
 The Px, Py, Pz parameters affect the factorization.  Any of the 3
@@ -65,12 +65,11 @@ It will do this based on the size and shape of the global simulation
 box so as to minimize the surface-to-volume ratio of each processor's
 sub-domain.
 
-Since LAMMPS does not load-balance by changing the grid of 3d
-processors on-the-fly, choosing explicit values for Px or Py or Pz can
-be used to override the LAMMPS default if it is known to be
-sub-optimal for a particular problem.  E.g. a problem where the extent
-of atoms will change dramatically in a particular dimension over the
-course of the simulation.
+Choosing explicit values for Px or Py or Pz can be used to override
+the default manner in which LAMMPS will create the regular 3d grid of
+processors, if it is known to be sub-optimal for a particular problem.
+E.g. a problem where the extent of atoms will change dramatically in a
+particular dimension over the course of the simulation.
 
 The product of Px, Py, Pz must equal P, the total # of processors
 LAMMPS is running on.  For a "2d simulation"_dimension.html, Pz must
@@ -94,6 +93,28 @@ different processor grids for different partitions, e.g.
 partition yes 1 processors 4 4 4
 partition yes 2 processors 2 3 2 :pre
 
+IMPORTANT NOTE: This command only affects the initial regular 3d grid
+created when the simulation box is first specified via a
+"create_box"_create_box.html or "read_data"_read_data.html or
+"read_restart"_read_restart.html command.  Or if the simulation box is
+re-created via the "replicate"_replicate.html command.  The same
+regular grid is initially created, regardless of which
+"comm_style"_comm_style.html command is in effect.
+
+If load-balancing is never invoked via the "balance"_balance.html or
+"fix balance"_fix_balance.html commands, then the initial regular grid
+will persist for all simulations.  If balancing is performed, some of
+the methods invoked by those commands retain the logical toplogy of
+the initial 3d grid, and the mapping of processors to the grid
+specified by the processors command.  However the grid spacings in
+different dimensions may change, so that processors own sub-domains of
+different sizes.  If the "comm_style tiled"_comm_style.html command is
+used, methods invoked by the balancing commands may discard the 3d
+grid of processors and tile the simulation domain with sub-domains of
+different sizes and shapes which no longer have a logical 3d
+connectivity.  If that occurs, all the information specified by the
+processors command is ignored.
+
 :line
 
 The {grid} keyword affects the factorization of P into Px,Py,Pz and it
@@ -137,7 +158,7 @@ access (NUMA) costs.  It also uses a different algorithm than the
 {twolevel} keyword for doing the two-level factorization of the
 simulation box into a 3d processor grid to minimize off-node
 communication, and it does its own MPI-based mapping of nodes and
-cores to the logical 3d grid.  Thus it may produce a different layout
+cores to the regular 3d grid.  Thus it may produce a different layout
 of the processors than the {twolevel} options.
 
 The {numa} style will give an error if the number of MPI processes is
@@ -232,11 +253,11 @@ and {Precv} must be integers from 1 to Np, where Np is the number of
 partitions you have defined via the "-partition command-line
 switch"_Section_start.html#start_7.
 
-A "dependency" means that the sending partition will create its 3d
-logical grid as Px by Py by Pz and after it has done this, it will
+A "dependency" means that the sending partition will create its
+regular 3d grid as Px by Py by Pz and after it has done this, it will
 send the Px,Py,Pz values to the receiving partition.  The receiving
-partition will wait to receive these values before creating its own 3d
-logical grid and will use the sender's Px,Py,Pz values as a
+partition will wait to receive these values before creating its own
+regular 3d grid and will use the sender's Px,Py,Pz values as a
 constraint.  The nature of the constraint is determined by the
 {cstyle} argument.
 
@@ -287,7 +308,7 @@ The universe and original IDs will only be different if you used the
 the processors differently than their rank in the original
 communicator LAMMPS was instantiated with.
 
-I,J,K are the indices of the processor in the 3d logical grid, each
+I,J,K are the indices of the processor in the regular 3d grid, each
 from 1 to Nd, where Nd is the number of processors in that dimension
 of the grid.
 
diff --git a/doc/read_data.html b/doc/read_data.html
index 8577c9f3b3..353c1b5857 100644
--- a/doc/read_data.html
+++ b/doc/read_data.html
@@ -120,7 +120,12 @@ is different than the default.
 </UL>
 <P>The initial simulation box size is determined by the lo/hi settings.
 In any dimension, the system may be periodic or non-periodic; see the
-<A HREF = "boundary.html">boundary</A> command.
+<A HREF = "boundary.html">boundary</A> command.  When the simulation box is created
+it is also partitioned into a regular 3d grid of rectangular bricks,
+one per processor, based on the number of processors being used and
+the settings of the <A HREF = "processors.html">processors</A> command.  The
+partitioning can later be changed by the <A HREF = "balance.html">balance</A> or
+<A HREF = "fix_balance.html">fix balance</A> commands.
 </P>
 <P>If the <I>xy xz yz</I> line does not appear, LAMMPS will set up an
 axis-aligned (orthogonal) simulation box.  If the line does appear,
diff --git a/doc/read_data.txt b/doc/read_data.txt
index 4ff5159bf8..b3d35d22cc 100644
--- a/doc/read_data.txt
+++ b/doc/read_data.txt
@@ -114,7 +114,12 @@ is different than the default.
 
 The initial simulation box size is determined by the lo/hi settings.
 In any dimension, the system may be periodic or non-periodic; see the
-"boundary"_boundary.html command.
+"boundary"_boundary.html command.  When the simulation box is created
+it is also partitioned into a regular 3d grid of rectangular bricks,
+one per processor, based on the number of processors being used and
+the settings of the "processors"_processors.html command.  The
+partitioning can later be changed by the "balance"_balance.html or
+"fix balance"_fix_balance.html commands.
 
 If the {xy xz yz} line does not appear, LAMMPS will set up an
 axis-aligned (orthogonal) simulation box.  If the line does appear,
diff --git a/doc/read_restart.html b/doc/read_restart.html
index 2966ba78e0..4835373854 100644
--- a/doc/read_restart.html
+++ b/doc/read_restart.html
@@ -30,7 +30,15 @@ read_restart poly.*.%
 </P>
 <P>Read in a previously saved simulation from a restart file.  This
 allows continuation of a previous run.  Information about what is
-stored in a restart file is given below.
+stored in a restart file is given below.  Basically this operation
+will re-create the simulation box with all its atoms and their
+attributes, at the point in time it was written to the restart file by
+a previous simluation.  The simulation box will be partitioned into a
+regular 3d grid of rectangular bricks, one per processor, based on the
+number of processors in the current simulation and the settings of the
+<A HREF = "processors.html">processors</A> command.  The partitioning can later be
+changed by the <A HREF = "balance.html">balance</A> or <A HREF = "fix_balance.html">fix
+balance</A> commands.
 </P>
 <P>Restart files are saved in binary format to enable exact restarts,
 meaning that the trajectories of a restarted run will precisely match
diff --git a/doc/read_restart.txt b/doc/read_restart.txt
index 7658dd42f6..d08fce975b 100644
--- a/doc/read_restart.txt
+++ b/doc/read_restart.txt
@@ -27,7 +27,15 @@ read_restart poly.*.% :pre
 
 Read in a previously saved simulation from a restart file.  This
 allows continuation of a previous run.  Information about what is
-stored in a restart file is given below.
+stored in a restart file is given below.  Basically this operation
+will re-create the simulation box with all its atoms and their
+attributes, at the point in time it was written to the restart file by
+a previous simluation.  The simulation box will be partitioned into a
+regular 3d grid of rectangular bricks, one per processor, based on the
+number of processors in the current simulation and the settings of the
+"processors"_processors.html command.  The partitioning can later be
+changed by the "balance"_balance.html or "fix
+balance"_fix_balance.html commands.
 
 Restart files are saved in binary format to enable exact restarts,
 meaning that the trajectories of a restarted run will precisely match
diff --git a/doc/replicate.html b/doc/replicate.html
index d494e9d225..6b1bac3821 100644
--- a/doc/replicate.html
+++ b/doc/replicate.html
@@ -27,7 +27,12 @@
 For example, replication factors of 2,2,2 will create a simulation
 with 8x as many atoms by doubling the simulation domain in each
 dimension.  A replication factor of 1 in a dimension leaves the
-simulation domain unchanged.
+simulation domain unchanged.  When the new simulation box is created
+it is also partitioned into a regular 3d grid of rectangular bricks,
+one per processor, based on the number of processors being used and
+the settings of the <A HREF = "processors.html">processors</A> command.  The
+partitioning can later be changed by the <A HREF = "balance.html">balance</A> or
+<A HREF = "fix_balance.html">fix balance</A> commands.
 </P>
 <P>All properties of the atoms are replicated, including their
 velocities, which may or may not be desirable.  New atom IDs are
diff --git a/doc/replicate.txt b/doc/replicate.txt
index 54509e469c..e035bd2821 100644
--- a/doc/replicate.txt
+++ b/doc/replicate.txt
@@ -24,7 +24,12 @@ Replicate the current simulation one or more times in each dimension.
 For example, replication factors of 2,2,2 will create a simulation
 with 8x as many atoms by doubling the simulation domain in each
 dimension.  A replication factor of 1 in a dimension leaves the
-simulation domain unchanged.
+simulation domain unchanged.  When the new simulation box is created
+it is also partitioned into a regular 3d grid of rectangular bricks,
+one per processor, based on the number of processors being used and
+the settings of the "processors"_processors.html command.  The
+partitioning can later be changed by the "balance"_balance.html or
+"fix balance"_fix_balance.html commands.
 
 All properties of the atoms are replicated, including their
 velocities, which may or may not be desirable.  New atom IDs are

angle_coeff	angle_style	atom_modify	atom_style	balance	bond_coeff
bond_style	boundary	box	change_box	clear	communicate
compute	compute_modify	create_atoms	create_box	delete_atoms	delete_bonds
dielectric	dihedral_coeff	dihedral_style	dimension	displace_atoms	dump
dump image	dump_modify	dump movie	echo	fix	fix_modify
group	if	improper_coeff	improper_style	include	jump
kspace_modify	kspace_style	label	lattice	log	mass
minimize	min_modify	min_style	molecule	neb	neigh_modify
neighbor	newton	next	package	pair_coeff	pair_modify
pair_style	pair_write	partition	prd	print	processors
quit	read_data	read_dump	read_restart	region	replicate
rerun	reset_timestep	restart	run	run_style	set
shell	special_bonds	suffix	tad	temper	thermo
thermo_modify	thermo_style	timestep	uncompute	undump	unfix
units	variable	velocity	write_data	write_dump	write_restart +
bond_style	boundary	box	change_box	clear	comm_modify
comm_style	compute	compute_modify	create_atoms	create_box	delete_atoms
delete_bonds	dielectric	dihedral_coeff	dihedral_style	dimension	displace_atoms
dump	dump image	dump_modify	dump movie	echo	fix
fix_modify	group	if	improper_coeff	improper_style	include
jump	kspace_modify	kspace_style	label	lattice	log
mass	minimize	min_modify	min_style	molecule	neb
neigh_modify	neighbor	newton	next	package	pair_coeff
pair_modify	pair_style	pair_write	partition	prd	print
processors	quit	read_data	read_dump	read_restart	region
replicate	rerun	reset_timestep	restart	run	run_style
set	shell	special_bonds	suffix	tad	temper
thermo	thermo_modify	thermo_style	timestep	uncompute	undump
unfix	units	variable	velocity	write_data	write_dump
write_restart