lammps/lib/atc/ParDenseMatrix.h

#ifndef PARDENSEMATRIX_H
#define PARDENSEMATRIX_H

#include "MatrixDef.h"
#include "DenseMatrix.h"
#include "DenseVector.h"
#include "MPI_Wrappers.h"

#include "ATC_Error.h"
using ATC::ATC_Error;

#include <algorithm>
#include <sstream>

namespace ATC_matrix {

  /**
   *  @class  ParDenseMatrix 
   *  @brief  Parallelized version of DenseMatrix class.
   */

  template <typename T>
  class ParDenseMatrix : public DenseMatrix<T> {
  public:
    MPI_Comm _comm;

    ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)
      : DenseMatrix<T>(rows, cols, z), _comm(comm) {}
    ParDenseMatrix(MPI_Comm comm, const DenseMatrix<T>& c)
      : DenseMatrix<T>(c), _comm(comm) {}
    ParDenseMatrix(MPI_Comm comm, const SparseMatrix<T>& c)
      : DenseMatrix<T>(c), _comm(comm) {}
    ParDenseMatrix(MPI_Comm comm, const Matrix<T>& c)
      : DenseMatrix<T>(c), _comm(comm) {}

    //////////////////////////////////////////////////////////////////////////////
    //* performs a matrix-vector multiply
    void ParMultMv(const Vector<T> &v,
             DenseVector<T> &c, const bool At, T a, T b)
    {
      // We can't generically support parallel multiplication because the data
      // types must be specified when using MPI
      MultMv(*this, v, c, At, a, b);
    }

  };

  template<>
  class ParDenseMatrix<double> : public DenseMatrix<double> {
  public:
    MPI_Comm _comm;

    ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)
      : DenseMatrix<double>(rows, cols, z), _comm(comm) {}
    ParDenseMatrix(MPI_Comm comm, const DenseMatrix<double>& c)
      : DenseMatrix<double>(c), _comm(comm) {}
    ParDenseMatrix(MPI_Comm comm, const SparseMatrix<double>& c)
      : DenseMatrix<double>(c), _comm(comm) {}
    ParDenseMatrix(MPI_Comm comm, const Matrix<double>& c)
      : DenseMatrix<double>(c), _comm(comm) {}


    void ParMultMv(const Vector<double> &v, DenseVector<double> &c,
        const bool At, double a, double b) const
    {
      // We don't support parallel vec-Mat multiplication yet
      if (At) {
        MultMv(*this, v, c, At, a, b);
        return;
      }

      const INDEX nRows = this->nRows();
      const INDEX nCols = this->nCols();

      if (c.size() != nRows) {
        c.resize(nRows);             // set size of C
        c.zero();                // do not add result to C
      } else c *= b;

      // Determine how many rows will be handled on each processor
      int nProcs = MPI_Wrappers::size(_comm);
      int myRank = MPI_Wrappers::rank(_comm);


      int *majorCounts = new int[nProcs];
      int *offsets = new int[nProcs];

#ifdef COL_STORAGE // Column-major storage
      int nMajor = nCols;
      int nMinor = nRows;
      int ParDenseMatrix::*majorField = &ParDenseMatrix::_nCols;
      int ParDenseMatrix::*minorField = &ParDenseMatrix::_nRows;
#else // Row-major storage
      int nMajor = nRows;
      int nMinor = nCols;
      int ParDenseMatrix::*majorField = &ParDenseMatrix::_nRows;
      int ParDenseMatrix::*minorField = &ParDenseMatrix::_nCols;
#endif

      for (int i = 0; i < nProcs; i++) {
        // If we have an uneven row-or-col/processors number, or too few rows
        // or cols, some processors will need to receive fewer rows/cols.
        offsets[i] = (i * nMajor) / nProcs;
        majorCounts[i] = (((i + 1) * nMajor) / nProcs) - offsets[i];
      }

      int myNMajor = majorCounts[myRank];
      int myMajorOffset = offsets[myRank];

      // Take data from an offset version of A
      ParDenseMatrix<double> A_local(_comm);
      A_local._data = this->_data + myMajorOffset * nMinor;
      A_local.*majorField = myNMajor;
      A_local.*minorField = nMinor;

#ifdef COL_STORAGE // Column-major storage

      // When splitting by columns, we split the vector as well, and sum the
      // results.

      DenseVector<double> v_local(myNMajor);
      for (int i = 0; i < myNMajor; i++)
        v_local(i) = v(myMajorOffset + i);

      // Store results in a local vector
      DenseVector<double> c_local = A_local * v_local;

      // Sum all vectors onto each processor
      MPI_Wrappers::allsum(_comm, c_local.ptr(), c.ptr(), c_local.size());

#else // Row-major storage

      // When splitting by rows, we use the whole vector and concatenate the
      // results.

      // Store results in a small local vector
      DenseVector<double> c_local(myNMajor);
      for (int i = 0; i < myNMajor; i++)
        c_local(i) = c(myMajorOffset + i);

      MultMv(A_local, v, c_local, At, a, b);

      // Gather the results onto each processor
      allgatherv(_comm, c_local.ptr(), c_local.size(), c.ptr(),
                 majorCounts, offsets);

#endif

      // Clear out the local matrix's pointer so we don't double-free
      A_local._data = NULL;

      delete [] majorCounts;
      delete [] offsets;

    }

  };

  // Operator for dense Matrix - dense vector product
  template<typename T>
  DenseVector<T> operator*(const ParDenseMatrix<T> &A, const Vector<T> &b)
  {
    DenseVector<T> c;
    A.ParMultMv(b, c, 0, 1.0, 0.0);
    return c;
  }


} // end namespace
#endif
ATC version 2.0, date: Aug7 git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10561 f3b2605a-c512-4ea7-a41b-209d697bcdaa 2013-08-08 05:34:54 +08:00			`#ifndef PARDENSEMATRIX_H`
			`#define PARDENSEMATRIX_H`

			`#include "MatrixDef.h"`
			`#include "DenseMatrix.h"`
			`#include "DenseVector.h"`
			`#include "MPI_Wrappers.h"`

			`#include "ATC_Error.h"`
			`using ATC::ATC_Error;`

			`#include <algorithm>`
			`#include <sstream>`

			`namespace ATC_matrix {`

			`/**`
			`* @class ParDenseMatrix`
			`* @brief Parallelized version of DenseMatrix class.`
			`*/`

			`template <typename T>`
			`class ParDenseMatrix : public DenseMatrix<T> {`
			`public:`
			`MPI_Comm _comm;`

			`ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)`
			`: DenseMatrix<T>(rows, cols, z), _comm(comm) {}`
			`ParDenseMatrix(MPI_Comm comm, const DenseMatrix<T>& c)`
			`: DenseMatrix<T>(c), _comm(comm) {}`
			`ParDenseMatrix(MPI_Comm comm, const SparseMatrix<T>& c)`
			`: DenseMatrix<T>(c), _comm(comm) {}`
			`ParDenseMatrix(MPI_Comm comm, const Matrix<T>& c)`
			`: DenseMatrix<T>(c), _comm(comm) {}`

			`//////////////////////////////////////////////////////////////////////////////`
			`//* performs a matrix-vector multiply`
			`void ParMultMv(const Vector<T> &v,`
			`DenseVector<T> &c, const bool At, T a, T b)`
			`{`
			`// We can't generically support parallel multiplication because the data`
			`// types must be specified when using MPI`
			`MultMv(*this, v, c, At, a, b);`
			`}`

			`};`

			`template<>`
			`class ParDenseMatrix<double> : public DenseMatrix<double> {`
			`public:`
			`MPI_Comm _comm;`

			`ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)`
			`: DenseMatrix<double>(rows, cols, z), _comm(comm) {}`
			`ParDenseMatrix(MPI_Comm comm, const DenseMatrix<double>& c)`
			`: DenseMatrix<double>(c), _comm(comm) {}`
			`ParDenseMatrix(MPI_Comm comm, const SparseMatrix<double>& c)`
			`: DenseMatrix<double>(c), _comm(comm) {}`
			`ParDenseMatrix(MPI_Comm comm, const Matrix<double>& c)`
			`: DenseMatrix<double>(c), _comm(comm) {}`


			`void ParMultMv(const Vector<double> &v, DenseVector<double> &c,`
			`const bool At, double a, double b) const`
			`{`
			`// We don't support parallel vec-Mat multiplication yet`
			`if (At) {`
			`MultMv(*this, v, c, At, a, b);`
			`return;`
			`}`

			`const INDEX nRows = this->nRows();`
			`const INDEX nCols = this->nCols();`

			`if (c.size() != nRows) {`
			`c.resize(nRows); // set size of C`
			`c.zero(); // do not add result to C`
			`} else c *= b;`

			`// Determine how many rows will be handled on each processor`
			`int nProcs = MPI_Wrappers::size(_comm);`
			`int myRank = MPI_Wrappers::rank(_comm);`



			`int *majorCounts = new int[nProcs];`
			`int *offsets = new int[nProcs];`

			`#ifdef COL_STORAGE // Column-major storage`
			`int nMajor = nCols;`
			`int nMinor = nRows;`
			`int ParDenseMatrix::*majorField = &ParDenseMatrix::_nCols;`
			`int ParDenseMatrix::*minorField = &ParDenseMatrix::_nRows;`
			`#else // Row-major storage`
			`int nMajor = nRows;`
			`int nMinor = nCols;`
			`int ParDenseMatrix::*majorField = &ParDenseMatrix::_nRows;`
			`int ParDenseMatrix::*minorField = &ParDenseMatrix::_nCols;`
			`#endif`

			`for (int i = 0; i < nProcs; i++) {`
			`// If we have an uneven row-or-col/processors number, or too few rows`
			`// or cols, some processors will need to receive fewer rows/cols.`
			`offsets[i] = (i * nMajor) / nProcs;`
			`majorCounts[i] = (((i + 1) * nMajor) / nProcs) - offsets[i];`
			`}`

			`int myNMajor = majorCounts[myRank];`
			`int myMajorOffset = offsets[myRank];`

			`// Take data from an offset version of A`
			`ParDenseMatrix<double> A_local(_comm);`
			`A_local._data = this->_data + myMajorOffset * nMinor;`
			`A_local.*majorField = myNMajor;`
			`A_local.*minorField = nMinor;`

			`#ifdef COL_STORAGE // Column-major storage`

			`// When splitting by columns, we split the vector as well, and sum the`
			`// results.`

			`DenseVector<double> v_local(myNMajor);`
			`for (int i = 0; i < myNMajor; i++)`
			`v_local(i) = v(myMajorOffset + i);`

			`// Store results in a local vector`
			`DenseVector<double> c_local = A_local * v_local;`

			`// Sum all vectors onto each processor`
ATC version 2.0, date: Aug21 git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10638 f3b2605a-c512-4ea7-a41b-209d697bcdaa 2013-08-22 07:06:07 +08:00			`MPI_Wrappers::allsum(_comm, c_local.ptr(), c.ptr(), c_local.size());`
ATC version 2.0, date: Aug7 git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10561 f3b2605a-c512-4ea7-a41b-209d697bcdaa 2013-08-08 05:34:54 +08:00
			`#else // Row-major storage`

			`// When splitting by rows, we use the whole vector and concatenate the`
			`// results.`

			`// Store results in a small local vector`
			`DenseVector<double> c_local(myNMajor);`
			`for (int i = 0; i < myNMajor; i++)`
			`c_local(i) = c(myMajorOffset + i);`

			`MultMv(A_local, v, c_local, At, a, b);`

			`// Gather the results onto each processor`
			`allgatherv(_comm, c_local.ptr(), c_local.size(), c.ptr(),`
			`majorCounts, offsets);`

			`#endif`

			`// Clear out the local matrix's pointer so we don't double-free`
			`A_local._data = NULL;`

			`delete [] majorCounts;`
			`delete [] offsets;`

			`}`

			`};`

			`// Operator for dense Matrix - dense vector product`
			`template<typename T>`
			`DenseVector<T> operator*(const ParDenseMatrix<T> &A, const Vector<T> &b)`
			`{`
			`DenseVector<T> c;`
			`A.ParMultMv(b, c, 0, 1.0, 0.0);`
			`return c;`
			`}`


			`} // end namespace`
			`#endif`