Merge branch 'master' into progguide-axel

2020-09-30 19:57:20 -04:00 · 2020-09-30 19:57:20 -04:00 · a9b27caf17
parent 38d954e038 6928e79883
commit a9b27caf17
27 changed files with 97 additions and 299 deletions
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@ -114,6 +114,7 @@ src/info.*                @akohlmey @rbberger
 src/timer.*               @akohlmey
 src/min*                  @sjplimp @stanmoore1
 src/utils.*               @akohlmey @rbberger
+src/math_eigen_impl.h     @jewettaij

 # tools
 tools/msi2lmp/*       @akohlmey
@ -134,6 +135,9 @@ cmake/presets/*.cmake @junghans @rbberger @akohlmey
 # python
 python/*              @rbberger

+# fortran
+fortran/*             @akohlmey
+
 # docs
 doc/utils/*/*         @rbberger
 doc/Makefile          @rbberger
--- a/cmake/presets/intel.cmake
+++ b/cmake/presets/intel.cmake
@ -1,7 +1,8 @@
-# preset that will enable clang/clang++ with support for MPI and OpenMP (on Linux boxes)
+# preset that will enable Intel compilers with support for MPI and OpenMP (on Linux boxes)

 set(CMAKE_CXX_COMPILER "icpc" CACHE STRING "" FORCE)
 set(CMAKE_C_COMPILER "icc" CACHE STRING "" FORCE)
+set(CMAKE_Fortran_COMPILER "ifort" CACHE STRING "" FORCE)
 set(MPI_CXX "icpc" CACHE STRING "" FORCE)
 set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
 unset(HAVE_OMP_H_INCLUDE CACHE)
@ -12,5 +13,6 @@ set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
 set(OpenMP_CXX "icpc" CACHE STRING "" FORCE)
 set(OpenMP_CXX_FLAGS "-qopenmp" CACHE STRING "" FORCE)
 set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
+set(OpenMP_Fortran_FLAGS "-qopenmp" CACHE STRING "" FORCE)
 set(OpenMP_omp_LIBRARY "libiomp5.so" CACHE PATH "" FORCE)

--- a/doc/src/Build_basics.rst
+++ b/doc/src/Build_basics.rst
@ -159,11 +159,11 @@ others (e.g. GCC version 9 and beyond, Clang version 10 and later) may
 implement strict OpenMP 4.0 and later semantics, which are incompatible
 with the OpenMP 3.1 semantics used in LAMMPS for maximal compatibility
 with compiler versions in use.  If compilation with OpenMP enabled fails
-because of your compiler requiring strict OpenMP 4.0 semantic, you can
+because of your compiler requiring strict OpenMP 4.0 semantics, you can
 change the behavior by adding ``-D LAMMPS_OMP_COMPAT=4`` to the
 ``LMP_INC`` variable in your makefile, or add it to the command line
-while configuring with CMake. CMake will detect the suitable setting for
-the GNU, Clang, and Intel compilers.
+while configuring with CMake.  LAMMPS will autodetect a suitable setting
+for most GNU, Clang, and Intel compilers.

 ----------

--- a/doc/src/Tools.rst
+++ b/doc/src/Tools.rst
@ -663,6 +663,7 @@ The singularity sub-directory contains container definitions files
 that can be used to build container images for building and testing
 LAMMPS on specific OS variants using the `Singularity <https://sylabs.io>`_
 container software. Contributions for additional variants are welcome.
+For more details please see the README.md file in that folder.

 ----------

--- a/src/ASPHERE/pair_line_lj.cpp
+++ b/src/ASPHERE/pair_line_lj.cpp
@ -74,7 +74,6 @@ void PairLineLJ::compute(int eflag, int vflag)
  double xi[2],xj[2],fi[2],dxi,dxj,dyi,dyj;
  int *ilist,*jlist,*numneigh,**firstneigh;

-  evdwl = 0.0;
  ev_init(eflag,vflag);

  double **x = atom->x;
--- a/src/ASPHERE/pair_tri_lj.cpp
+++ b/src/ASPHERE/pair_tri_lj.cpp
@ -74,7 +74,6 @@ void PairTriLJ::compute(int eflag, int vflag)
  double dc1[3],dc2[3],dc3[3];
  int *ilist,*jlist,*numneigh,**firstneigh;

-  evdwl = 0.0;
  ev_init(eflag,vflag);

  AtomVecTri::Bonus *bonus = avec->bonus;
--- a/src/BODY/compute_body_local.cpp
+++ b/src/BODY/compute_body_local.cpp
@ -123,7 +123,7 @@ void ComputeBodyLocal::compute_local()
  int ncount = compute_body(0);
  if (ncount > nmax) reallocate(ncount);
  size_local_rows = ncount;
-  ncount = compute_body(1);
+  compute_body(1);
 }

 /* ----------------------------------------------------------------------
--- a/src/BODY/compute_temp_body.cpp
+++ b/src/BODY/compute_temp_body.cpp
@ -343,7 +343,6 @@ void ComputeTempBody::compute_vector()

        inertia = bonus[body[i]].inertia;
        quat = bonus[body[i]].quat;
-        massone = rmass[i];

        // wbody = angular velocity in body frame

--- a/src/CLASS2/angle_class2.cpp
+++ b/src/CLASS2/angle_class2.cpp
@ -447,10 +447,6 @@ double AngleClass2::single(int type, int i1, int i2, int i3)
  if (c > 1.0) c = 1.0;
  if (c < -1.0) c = -1.0;

-  double s = sqrt(1.0 - c*c);
-  if (s < SMALL) s = SMALL;
-  s = 1.0/s;
-
  double dtheta = acos(c) - theta0[type];
  double dtheta2 = dtheta*dtheta;
  double dtheta3 = dtheta2*dtheta;
--- a/src/SPIN/neb_spin.cpp
+++ b/src/SPIN/neb_spin.cpp
@ -298,7 +298,7 @@ void NEBSpin::run()
        fprintf(uscreen,"Step MaxReplicaTorque MaxAtomTorque "
                "GradV0 GradV1 GradVc EBF EBR RDT "
                "RD1 PE1 RD2 PE2 ... RDN PEN "
-                "GradV0dottan DN0... GradVNdottan DNN\n");
+                "GradV0dottan DN0 ... GradVNdottan DNN\n");
      } else {
        fprintf(uscreen,"Step MaxReplicaTorque MaxAtomTorque "
                "GradV0 GradV1 GradVc "
--- a/src/SPIN/pair_spin_dipole_cut.cpp
+++ b/src/SPIN/pair_spin_dipole_cut.cpp
@ -379,7 +379,7 @@ void PairSpinDipoleCut::compute_dipolar(int /* i */, int /* j */, double eij[3],
 ------------------------------------------------------------------------- */

 void PairSpinDipoleCut::compute_dipolar_mech(int /* i */, int /* j */, double eij[3],
-    double fi[3], double spi[3], double spj[3], double r2inv)
+    double fi[3], double spi[4], double spj[4], double r2inv)
 {
  double sisj,sieij,sjeij;
  double gigjri4,bij,pre;
--- a/src/SPIN/pair_spin_dipole_long.cpp
+++ b/src/SPIN/pair_spin_dipole_long.cpp
@ -453,7 +453,7 @@ void PairSpinDipoleLong::compute_long(int /* i */, int /* j */, double eij[3],
 ------------------------------------------------------------------------- */

 void PairSpinDipoleLong::compute_long_mech(int /* i */, int /* j */, double eij[3],
-    double bij[4], double fi[3], double spi[3], double spj[3])
+    double bij[4], double fi[3], double spi[4], double spj[4])
 {
  double sisj,sieij,sjeij,b2,b3;
  double g1,g2,g1b2_g2b3,gigj,pre;
--- a/src/USER-INTEL/fix_intel.cpp
+++ b/src/USER-INTEL/fix_intel.cpp
@ -160,9 +160,9 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)

  // if using LRT mode, create the integrate style
  if (_lrt) {
-    char *str;
-    str = (char *) "verlet/lrt/intel";
-    update->create_integrate(1,&str,0);
+    char *cmd[1];
+    cmd[0] = (char *) "verlet/lrt/intel";
+    update->create_integrate(1,cmd,0);
  }

  // error check
--- a/src/USER-INTEL/intel_intrinsics.h
+++ b/src/USER-INTEL/intel_intrinsics.h
@ -35,114 +35,10 @@
 #include <fvec.h>
 #endif

-// Vector classes for Cilk array notation
-// This is experimental and doesn't yield good code yet
-template<int VL, typename fscal>
-struct lmp_intel_an_fvec {
-    fscal data[VL];
-    lmp_intel_an_fvec() {}
-    explicit lmp_intel_an_fvec(const fscal f) { data[:] = f; }
-    explicit lmp_intel_an_fvec(fscal f[VL]) { data[:] = f[:]; }
-    lmp_intel_an_fvec(const lmp_intel_an_fvec &a) { data[:] = a.data[:]; }
-    lmp_intel_an_fvec& operator =(const lmp_intel_an_fvec &a) { data[:] = a.data[:]; return *this; }
-    const lmp_intel_an_fvec operator +(const lmp_intel_an_fvec &b) const {
-        lmp_intel_an_fvec ret = *this;
-        ret.data[:] += b.data[:];
-        return ret;
-    }
-    const lmp_intel_an_fvec operator -(const lmp_intel_an_fvec &b) const {
-        lmp_intel_an_fvec ret = *this;
-        ret.data[:] -= b.data[:];
-        return ret;
-    }
-    const lmp_intel_an_fvec operator *(const lmp_intel_an_fvec &b) const {
-        lmp_intel_an_fvec ret = *this;
-        ret.data[:] *= b.data[:];
-        return ret;
-    }
-    const lmp_intel_an_fvec operator /(const lmp_intel_an_fvec &b) const {
-        lmp_intel_an_fvec ret = *this;
-        ret.data[:] /= b.data[:];
-        return ret;
-    }
-    lmp_intel_an_fvec& operator +=(const lmp_intel_an_fvec &b) {
-        data[:] += b.data[:]; return *this;
-    }
-    lmp_intel_an_fvec& operator -=(const lmp_intel_an_fvec &b) {
-        data[:] -= b.data[:]; return *this;
-    }
-    lmp_intel_an_fvec& operator *=(const lmp_intel_an_fvec &b) {
-        data[:] *= b.data[:]; return *this;
-    }
-    lmp_intel_an_fvec& operator /=(const lmp_intel_an_fvec &b) {
-        data[:] /= b.data[:]; return *this;
-    }
-    friend lmp_intel_an_fvec sqrt(const lmp_intel_an_fvec &a) __attribute__((always_inline)) {
-        lmp_intel_an_fvec ret; ret.data[:] = sqrt(a.data[:]); return ret;
-    }
-    friend lmp_intel_an_fvec exp(const lmp_intel_an_fvec &a) __attribute__((always_inline)) {
-        lmp_intel_an_fvec ret; ret.data[:] = exp(a.data[:]); return ret;
-    }
-    friend lmp_intel_an_fvec sin(const lmp_intel_an_fvec &a) __attribute__((always_inline)) {
-        lmp_intel_an_fvec ret; ret.data[:] = sin(a.data[:]); return ret;
-    }
-    friend lmp_intel_an_fvec invsqrt(const lmp_intel_an_fvec &a) __attribute__((always_inline)) {
-        lmp_intel_an_fvec ret; ret.data[:] = ((fscal)1.) / sqrt(a.data[:]); return ret;
-    }
-    friend lmp_intel_an_fvec pow(const lmp_intel_an_fvec &a, const lmp_intel_an_fvec &b) __attribute__((always_inline)) {
-        lmp_intel_an_fvec ret; ret.data[:] = pow(a.data[:], b.data[:]); return ret;
-    }
-    lmp_intel_an_fvec operator - () const {
-        lmp_intel_an_fvec ret; ret.data[:] = - data[:]; return ret;
-    }
-};
-template<int VL>
-struct lmp_intel_an_ivec {
-    int data[VL];
-    lmp_intel_an_ivec() {}
-    explicit lmp_intel_an_ivec(int i) { data[:] = i; }
-    explicit lmp_intel_an_ivec(const int * a) { data[:] = a[0:VL]; }
-    const lmp_intel_an_ivec operator &(const lmp_intel_an_ivec &b) {
-        lmp_intel_an_ivec ret = *this;
-        ret.data[:] &= b.data[:];
-        return ret;
-    }
-    const lmp_intel_an_ivec operator |(const lmp_intel_an_ivec &b) {
-        lmp_intel_an_ivec ret = *this;
-        ret.data[:] |= b.data[:];
-        return ret;
-    }
-    const lmp_intel_an_ivec operator +(const lmp_intel_an_ivec &b) {
-        lmp_intel_an_ivec ret = *this;
-        ret.data[:] += b.data[:];
-        return ret;
-    }
-};
-template<int VL>
-struct lmp_intel_an_bvec {
-    bool data[VL];
-    lmp_intel_an_bvec() {}
-    lmp_intel_an_bvec(const lmp_intel_an_bvec &a) { data[:] = a.data[:]; }
-    lmp_intel_an_bvec& operator =(const lmp_intel_an_bvec &a) { data[:] = a.data[:]; return *this; }
-    explicit lmp_intel_an_bvec(int i) { data[:] = i; }
-    friend lmp_intel_an_bvec operator &(const lmp_intel_an_bvec &a, const lmp_intel_an_bvec &b) __attribute__((always_inline)) {
-        lmp_intel_an_bvec ret; ret.data[:] = a.data[:] & b.data[:]; return ret;
-    }
-    friend lmp_intel_an_bvec operator |(const lmp_intel_an_bvec &a, const lmp_intel_an_bvec &b) __attribute__((always_inline)) {
-        lmp_intel_an_bvec ret; ret.data[:] = a.data[:] | b.data[:]; return ret;
-    }
-    friend lmp_intel_an_bvec operator ~(const lmp_intel_an_bvec &a) __attribute__((always_inline)) {
-        lmp_intel_an_bvec ret; ret.data[:] = ! a.data[:]; return ret;
-    }
-    lmp_intel_an_bvec& operator &=(const lmp_intel_an_bvec &a) __attribute__((always_inline)) {
-        data[:] &= a.data[:]; return *this;
-    }
-};
-
 namespace lmp_intel {

-// Self explanatory mostly, KNC=IMCI and AVX-512, NONE=Scalar, AN=Array Not.
-enum CalculationMode { KNC, AVX, AVX2, SSE, NONE, AN };
+// Self explanatory mostly, KNC=IMCI and AVX-512, NONE=Scalar.
+enum CalculationMode {KNC, AVX, AVX2, SSE, NONE};
 #ifdef __MIC__
  #ifdef LMP_INTEL_VECTOR_MIC
  static const CalculationMode mode = LMP_INTEL_VECTOR_MIC;
@ -1916,148 +1812,6 @@ struct vector_ops<flt_t, NONE> {
    }
 };

-// Array notation implementation
-template<class flt_t>
-struct vector_ops<flt_t, AN> {
-    static const int VL = 4;
-    typedef flt_t fscal;
-    typedef lmp_intel_an_fvec<VL, fscal> fvec;
-    typedef lmp_intel_an_ivec<VL> ivec;
-    typedef lmp_intel_an_bvec<VL> bvec;
-    typedef flt_t farr[VL];
-    typedef int iarr[VL];
-    static fvec recip(const fvec &a) {
-        fvec ret; ret.data[:] = ((fscal)1.) / a.data[:]; return ret;
-    }
-    template<int scale>
-    static void gather_prefetch_t0(const ivec &idx, const bvec &mask, const void *base) {
-      // nop
-    }
-    template<int scale>
-    static fvec gather(const fvec &from, const bvec &mask, const ivec &idx, const void *base) {
-      fvec ret = from;
-      if (mask.data[:]) ret.data[:] = *reinterpret_cast<const fscal *>(reinterpret_cast<const char*>(base) + scale * idx.data[:]);
-      return ret;
-    }
-    template<class T>
-    static void gather_x(const ivec &idxs, const bvec &mask, const T *base, fvec *x, fvec *y, fvec *z, ivec *w) {
-      *x = gather<1>(*x, mask, idxs, &base->x);
-      *y = gather<1>(*y, mask, idxs, &base->y);
-      *z = gather<1>(*z, mask, idxs, &base->z);
-      *w = int_gather<1>(*w, mask, idxs, &base->w);
-    }
-    static void gather_8(const ivec &idxs, const bvec &mask, const void *base,
-        fvec *r0, fvec *r1, fvec *r2, fvec *r3, fvec *r4, fvec *r5, fvec *r6, fvec *r7) {
-      fvec a = zero(), b = zero(), c = zero(), d = zero();
-      gather_4(idxs, mask, base, r0, r1, r2, r3);
-      gather_4(idxs, mask, reinterpret_cast<const char*>(base) + 4 * sizeof(fscal), r4, r5, r6, r7);
-    }
-    static void gather_4(const ivec &idxs, const bvec &mask, const void *base,
-        fvec *r0, fvec *r1, fvec *r2, fvec *r3) {
-      *r0 = gather<4>(*r0, mask, idxs, reinterpret_cast<const char*>(base) +  0 * sizeof(fscal));
-      *r1 = gather<4>(*r1, mask, idxs, reinterpret_cast<const char*>(base) +  1 * sizeof(fscal));
-      *r2 = gather<4>(*r2, mask, idxs, reinterpret_cast<const char*>(base) +  2 * sizeof(fscal));
-      *r3 = gather<4>(*r3, mask, idxs, reinterpret_cast<const char*>(base) +  3 * sizeof(fscal));
-    }
-    static fvec blend(const bvec &mask, const fvec &a, const fvec &b) {
-      fvec ret = a;
-      if (mask.data[:]) ret.data[:] = b.data[:];
-      return ret;
-    }
-    static ivec int_blend(const bvec &mask, const ivec &a, const ivec &b) {
-      fvec ret = a;
-      if (mask.data[:]) ret.data[:] = b.data[:];
-      return ret;
-    }
-    static fvec fmadd(const fvec &a, const fvec &b, const fvec &c) {
-      fvec ret; ret.data[:] = a.data[:] * b.data[:] + c.data[:]; return ret;
-    }
-    static fvec zero() {
-      return fvec(0.);
-    }
-    static bvec cmpeq(const fvec &a, const fvec &b) {
-      bvec ret; ret.data[:] = a.data[:] == b.data[:]; return ret;
-    }
-    static bvec cmpnle(const fvec &a, const fvec &b) {
-      bvec ret; ret.data[:] = !(a.data[:] <= b.data[:]); return ret;
-    }
-    static bvec cmple(const fvec &a, const fvec &b) {
-      bvec ret; ret.data[:] = a.data[:] <= b.data[:]; return ret;
-    }
-    static bvec cmplt(const fvec &a, const fvec &b) {
-      bvec ret; ret.data[:] = a.data[:] < b.data[:]; return ret;
-    }
-    static bvec int_cmpneq(const ivec &a, const ivec &b) {
-      bvec ret; ret.data[:] = a.data[:] != b.data[:]; return ret;
-    }
-    static bvec int_cmplt(const ivec &a, const ivec &b) {
-      bvec ret; ret.data[:] = a.data[:] < b.data[:]; return ret;
-    }
-    static fvec invsqrt(const fvec &a) {
-      fvec ret; ret.data[:] = ((fscal)1.) / sqrt(a.data[:]); return ret;
-    }
-    static fvec sincos(fvec *c, const fvec &a) {
-      c->data[:] = cos(a.data[:]);
-      fvec ret; ret.data[:] = sin(a.data[:]); return ret;
-    }
-    static fscal reduce_add(const fvec &a) {
-      return __sec_reduce_add(a.data[:]);
-    }
-    static ivec int_mullo(const ivec &a, const ivec &b) {
-      ivec ret; ret.data[:] = a.data[:] * b.data[:]; return ret;
-    }
-    static ivec int_mask_add(const ivec &src, const bvec &mask, const ivec &a, const ivec &b) {
-      ivec ret = src;
-      if (mask.data[:]) ret.data[:] = a.data[:] + b.data[:];
-      return ret;
-    }
-    template<int scale>
-    static ivec int_gather(const ivec &from, bvec mask, const ivec &idx, const void *base) {
-      ivec ret = from;
-      if (mask.data[:]) ret.data[:] = reinterpret_cast<const int*>(base)[scale * idx.data[:] / sizeof(int)];
-      return ret;
-    }
-    static fvec mask_add(const fvec &src, const bvec &mask, const fvec &a, const fvec &b) {
-      fvec ret = src;
-      if (mask.data[:]) ret.data[:] = a.data[:] + b.data[:];
-      return ret;
-    }
-    static void store(void *at, const fvec &a) {
-      reinterpret_cast<fscal*>(at)[0:VL] = a.data[:];
-    }
-    static void int_store(int *at, const ivec &a) {
-      reinterpret_cast<int*>(at)[0:VL] = a.data[:];
-    }
-    static void mask_store(int *at, const bvec &a) {
-      at[0:VL] = a.data[:];
-    }
-    static fvec min(const fvec &a, const fvec &b) {
-      fvec ret = b;
-      if (a.data[:] < b.data[:]) ret.data[:] = a.data[:];
-      return ret;
-    }
-    static bool mask_test_at(const bvec &mask, int at) {
-      return mask.data[at];
-    }
-    static bool mask_testz(const bvec &mask) {
-      return ! __sec_reduce_or(mask.data[:]);
-    }
-    static bvec mask_enable_lower(int n) {
-      bvec ret; ret.data[:] = __sec_implicit_index(0) < n; return ret;
-    }
-    static ivec int_load_vl(const int *a) {
-      return ivec(a);
-    }
-    static void int_clear_arr(int *a) {
-      a[0:VL] = 0;
-    }
-    static bvec full_mask() {
-      return bvec(1);
-    }
-    static void int_print(const ivec &a) {
-    }
-};
-
 // Mixins to implement mixed precision and single/single and double/double
 // This one is for single/single and double/double
 template<class BASE_flt_t, CalculationMode BASE_mic>
@ -2138,7 +1892,7 @@ struct AccumulatorTwiceMixin {
 };

 // For cases where vector_ops<float,x>::VL == vector_ops<double,x>::VL
-// i.e. scalar & AN
+
 template<class BASE_flt_t, class HIGH_flt_t, CalculationMode mic>
 struct AccumulatorTwiceMixinNone {
  typedef vector_ops<BASE_flt_t, mic> BASE;
@ -2177,11 +1931,8 @@ struct vector_routines<float,float,mic> : public vector_ops<float, mic>, public
 template<CalculationMode mic>
 struct vector_routines<float,double,mic> : public vector_ops<float, mic>, public AccumulatorTwiceMixin<float,double, mic> {};

-// Specialize for AN and scalar
+// Specialize for scalar
 template<>
 struct vector_routines<float,double,NONE> : public vector_ops<float, NONE>, public AccumulatorTwiceMixinNone<float,double, NONE> {};

-template<>
-struct vector_routines<float,double,AN> : public vector_ops<float, AN>, public AccumulatorTwiceMixinNone<float,double, AN> {};
-
 } // namespace lmp_intel
--- a/src/USER-MISC/bond_special.cpp
+++ b/src/USER-MISC/bond_special.cpp
@ -184,8 +184,8 @@ void BondSpecial::read_restart(FILE *fp)
  allocate();

  if (comm->me == 0) {
-    fread(&factor_lj[1],sizeof(double),atom->nbondtypes,fp);
-    fread(&factor_coul[1],sizeof(double),atom->nbondtypes,fp);
+    utils::sfread(FLERR,&factor_lj[1],sizeof(double),atom->nbondtypes,fp,nullptr,error);
+    utils::sfread(FLERR,&factor_coul[1],sizeof(double),atom->nbondtypes,fp,nullptr,error);
  }
  MPI_Bcast(&factor_lj[1],atom->nbondtypes,MPI_DOUBLE,0,world);
  MPI_Bcast(&factor_coul[1],atom->nbondtypes,MPI_DOUBLE,0,world);
--- a/src/USER-MISC/fix_propel_self.cpp
+++ b/src/USER-MISC/fix_propel_self.cpp
@ -160,13 +160,13 @@ template <int filter_by_type>
 void FixPropelSelf::post_force_quaternion(int /* vflag */ )
 {
  double **f = atom->f;
-  AtomVecEllipsoid *av = static_cast<AtomVecEllipsoid*>(atom->avec);

  int *mask = atom->mask;
  int nlocal = atom->nlocal;
  int *type = atom->type;
  int* ellipsoid = atom->ellipsoid;

+  AtomVecEllipsoid *av = static_cast<AtomVecEllipsoid*>(atom->style_match("ellipsoid"));
  AtomVecEllipsoid::Bonus *bonus = av->bonus;

  // Add the active force to the atom force:
--- a/src/USER-MISC/pair_coul_slater_cut.cpp
+++ b/src/USER-MISC/pair_coul_slater_cut.cpp
@ -152,10 +152,10 @@ void PairCoulSlaterCut::write_restart_settings(FILE *fp)
 void PairCoulSlaterCut::read_restart_settings(FILE *fp)
 {
  if (comm->me == 0) {
-    fread(&cut_global,sizeof(double),1,fp);
-    fread(&lamda,sizeof(double),1,fp);
-    fread(&offset_flag,sizeof(int),1,fp);
-    fread(&mix_flag,sizeof(int),1,fp);
+    utils::sfread(FLERR,&cut_global,sizeof(double),1,fp,nullptr,error);
+    utils::sfread(FLERR,&lamda,sizeof(double),1,fp,nullptr,error);
+    utils::sfread(FLERR,&offset_flag,sizeof(int),1,fp,nullptr,error);
+    utils::sfread(FLERR,&mix_flag,sizeof(int),1,fp,nullptr,error);
  }
  MPI_Bcast(&cut_global,1,MPI_DOUBLE,0,world);
  MPI_Bcast(&lamda,1,MPI_DOUBLE,0,world);
--- a/src/USER-OMP/fix_omp.cpp
+++ b/src/USER-OMP/fix_omp.cpp
@ -78,10 +78,10 @@ FixOMP::FixOMP(LAMMPS *lmp, int narg, char **arg)
 #endif
  }

+#if defined(_OPENMP)
  if (nthreads < 1)
    error->all(FLERR,"Illegal number of OpenMP threads requested");

-#if defined(_OPENMP)
  int reset_thr = 0;
 #endif
  if (nthreads != comm->nthreads) {
--- a/src/USER-UEF/compute_pressure_uef.cpp
+++ b/src/USER-UEF/compute_pressure_uef.cpp
@ -78,7 +78,7 @@ double ComputePressureUef::compute_scalar()
  addstep(update->ntimestep+1);

  int k =0;
-  scalar = 0;
+  scalar = 0.0;
  if (ext_flags[0]) {
    scalar += vector[0];
    k++;
@ -92,7 +92,7 @@ double ComputePressureUef::compute_scalar()
    k++;
  }

-  scalar /= k;
+  if (k > 1) scalar /= k;
  return scalar;
 }

--- a/src/change_box.cpp
+++ b/src/change_box.cpp
@ -176,7 +176,7 @@ void ChangeBox::command(int narg, char **arg)

  int move_atoms = 0;
  for (int m = 0; m < nops; m++) {
-    if (ops[m].style != ORTHO || ops[m].style != TRICLINIC) move_atoms = 1;
+    if (ops[m].style != ORTHO && ops[m].style != TRICLINIC) move_atoms = 1;
  }

  // error if moving atoms and there is stored per-atom restart state
--- a/src/compute_pressure.cpp
+++ b/src/compute_pressure.cpp
@ -36,7 +36,7 @@ using namespace LAMMPS_NS;

 ComputePressure::ComputePressure(LAMMPS *lmp, int narg, char **arg) :
  Compute(lmp, narg, arg),
-  vptr(nullptr), id_temp(nullptr)
+  vptr(nullptr), id_temp(nullptr), pstyle(nullptr)
 {
  if (narg < 4) error->all(FLERR,"Illegal compute pressure command");
  if (igroup) error->all(FLERR,"Compute pressure must use group all");
@ -146,6 +146,7 @@ ComputePressure::~ComputePressure()
  delete [] id_temp;
  delete [] vector;
  delete [] vptr;
+  delete [] pstyle;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/input.cpp
+++ b/src/input.cpp
@ -1234,12 +1234,11 @@ void Input::shell()
  } else if (strcmp(arg[0],"putenv") == 0) {
    if (narg < 2) error->all(FLERR,"Illegal shell putenv command");
    for (int i = 1; i < narg; i++) {
-      char *ptr = strdup(arg[i]);
      rv = 0;
 #ifdef _WIN32
-      if (ptr != nullptr) rv = _putenv(ptr);
+      if (arg[i]) rv = _putenv(arg[i]);
 #else
-      if (ptr != nullptr) rv = putenv(ptr);
+      if (arg[i]) rv = putenv(arg[i]);
 #endif
      rv = (rv < 0) ? errno : 0;
      MPI_Reduce(&rv,&err,1,MPI_INT,MPI_MAX,0,world);
--- a/src/math_eigen_impl.h
+++ b/src/math_eigen_impl.h
@ -849,6 +849,8 @@ void Jacobi<Scalar, Vector, Matrix, ConstMatrix>::
 Dealloc() {
  //assert(! is_preallocated);
  Dealloc2D(&M);
+  delete[] max_idx_row;
+  max_idx_row = nullptr;
  Init();
 }

--- a/src/omp_compat.h
+++ b/src/omp_compat.h
@ -25,11 +25,30 @@
 // so this is what LAMMPS primarily uses.  For those compilers
 // that strictly implement OpenMP 4.0 (such as GCC 9.0 and later
 // or Clang 10.0 and later), we give up default(none).
-#if LAMMPS_OMP_COMPAT == 4
-#    define LMP_SHARED(...)
-#    define LMP_DEFAULT_NONE default(shared)
-#else
-#    define LMP_SHARED(...) shared(__VA_ARGS__)
-#    define LMP_DEFAULT_NONE default(none)
+
+// autodetect OpenMP compatibility if not explicitly set
+
+#ifndef LAMMPS_OMP_COMPAT
+#  if defined(__INTEL_COMPILER)
+#    if __INTEL_COMPILER > 18
+#      define LAMMPS_OMP_COMPAT 4
+#    endif
+#  elif defined(__clang__)
+#    if __clang_major__ >= 10
+#      define LAMMPS_OMP_COMPAT 4
+#    endif
+#  elif defined(__GNUC__)
+#    if __GNUC__ >= 9
+#      define LAMMPS_OMP_COMPAT 4
+#    endif
+#  endif
+#endif
+
+#if LAMMPS_OMP_COMPAT == 4
+#  define LMP_SHARED(...)
+#  define LMP_DEFAULT_NONE default(shared)
+#else
+#  define LMP_SHARED(...) shared(__VA_ARGS__)
+#  define LMP_DEFAULT_NONE default(none)
 #endif

--- a/src/reset_atom_ids.cpp
+++ b/src/reset_atom_ids.cpp
@ -368,9 +368,9 @@ void ResetIDs::sort()
  // bins are numbered from 0 to Nbins-1

  bigint nbins = (bigint) nbinx*nbiny*nbinz;
-  int nlo = nbins / nprocs;
-  int nhi = nlo + 1;
-  int nplo = nprocs - (nbins % nprocs);
+  bigint nlo = nbins / nprocs;
+  bigint nhi = nlo + 1;
+  bigint nplo = nprocs - (nbins % nprocs);
  bigint nbinlo = nplo*nlo;

  if (me < nplo) {
--- a/tools/singularity/README.md
+++ b/tools/singularity/README.md
@ -5,12 +5,12 @@ for [Singularity](https://sylabs.io), suitable for compiling and testing
 LAMMPS on a variety of OS variants with support for most standard
 packages and - for some of them - also building/spellchecking the manual
 in all supported formats. This allows to test and debug LAMMPS code on
-different OS variants than what is locally installed on your development
+different OS variants without doing a full installation on your development
 workstation, e.g. when bugs are reported that can only be reproduced on
 a specific OS or with specific (mostly older) versions of tools,
 compilers, or libraries.

-Ready-to-use container images built from these definition files are
+Ready-to-use container images built from some these definition files are
 occasionally uploaded to the container library at sylabs.io. They
 can be found here: https://cloud.sylabs.io/library/lammps/default/lammps_development#
 and will be signed with a GPG key that has the fingerprint:
@ -25,7 +25,7 @@ git clone --depth 500  git://github.com/lammps/lammps.git lammps
 mkdir build-centos7
 cd build-centos7
 sudo singularity build centos7.sif ../tools/singularity/centos7.def
-singularity shell centos7.sif
+singularity exec centos7.sif bash --login
 cmake -C ../cmake/presets/most.cmake ../cmake
 make
 ```
@ -39,7 +39,7 @@ git clone --depth 500  git://github.com/lammps/lammps.git lammps
 mkdir build-ubuntu18
 cd build-ubuntu18
 singularity pull library://lammps/default/lammps_development:ubuntu18.04
-singularity shell lammps_development_ubuntu18.04.sif
+singularity exec lammps_development_ubuntu18.04.sif bash --login
 cmake -C ../cmake/presets/most.cmake ../cmake
 make
 ```
--- a/unittest/commands/test_simple_commands.cpp
+++ b/unittest/commands/test_simple_commands.cpp
@ -312,6 +312,32 @@ TEST_F(SimpleCommandsTest, Units)

    TEST_FAILURE(".*ERROR: Illegal units command.*", lmp->input->one("units unknown"););
 }
+
+TEST_F(SimpleCommandsTest, Shell)
+{
+    if (!verbose) ::testing::internal::CaptureStdout();
+    lmp->input->one("shell putenv TEST_VARIABLE=simpletest");
+    if (!verbose) ::testing::internal::GetCapturedStdout();
+
+    char * test_var = getenv("TEST_VARIABLE");
+    ASSERT_NE(test_var, nullptr);
+    ASSERT_THAT(test_var, StrEq("simpletest"));
+
+    if (!verbose) ::testing::internal::CaptureStdout();
+    lmp->input->one("shell putenv TEST_VARIABLE=simpletest");
+    lmp->input->one("shell putenv TEST_VARIABLE2=simpletest2 OTHER_VARIABLE=2");
+    if (!verbose) ::testing::internal::GetCapturedStdout();
+
+    char * test_var2 = getenv("TEST_VARIABLE2");
+    char * other_var = getenv("OTHER_VARIABLE");
+
+    ASSERT_NE(test_var2, nullptr);
+    ASSERT_THAT(test_var2, StrEq("simpletest2"));
+
+    ASSERT_NE(other_var, nullptr);
+    ASSERT_THAT(other_var, StrEq("2"));
+}
+
 } // namespace LAMMPS_NS

 int main(int argc, char **argv)