lammps/lib/atc/ParDenseMatrix.h

173 lines
5.0 KiB
C
Raw Normal View History

#ifndef PARDENSEMATRIX_H
#define PARDENSEMATRIX_H
#include "MatrixDef.h"
#include "DenseMatrix.h"
#include "DenseVector.h"
#include "MPI_Wrappers.h"
#include "ATC_Error.h"
using ATC::ATC_Error;
#include <algorithm>
#include <sstream>
namespace ATC_matrix {
/**
* @class ParDenseMatrix
* @brief Parallelized version of DenseMatrix class.
*/
template <typename T>
class ParDenseMatrix : public DenseMatrix<T> {
public:
MPI_Comm _comm;
ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)
: DenseMatrix<T>(rows, cols, z), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const DenseMatrix<T>& c)
: DenseMatrix<T>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const SparseMatrix<T>& c)
: DenseMatrix<T>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const Matrix<T>& c)
: DenseMatrix<T>(c), _comm(comm) {}
//////////////////////////////////////////////////////////////////////////////
//* performs a matrix-vector multiply
void ParMultMv(const Vector<T> &v,
DenseVector<T> &c, const bool At, T a, T b)
{
// We can't generically support parallel multiplication because the data
// types must be specified when using MPI
MultMv(*this, v, c, At, a, b);
}
};
template<>
class ParDenseMatrix<double> : public DenseMatrix<double> {
public:
MPI_Comm _comm;
ParDenseMatrix(MPI_Comm comm, INDEX rows=0, INDEX cols=0, bool z=1)
: DenseMatrix<double>(rows, cols, z), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const DenseMatrix<double>& c)
: DenseMatrix<double>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const SparseMatrix<double>& c)
: DenseMatrix<double>(c), _comm(comm) {}
ParDenseMatrix(MPI_Comm comm, const Matrix<double>& c)
: DenseMatrix<double>(c), _comm(comm) {}
void ParMultMv(const Vector<double> &v, DenseVector<double> &c,
const bool At, double a, double b) const
{
// We don't support parallel vec-Mat multiplication yet
if (At) {
MultMv(*this, v, c, At, a, b);
return;
}
const INDEX nRows = this->nRows();
const INDEX nCols = this->nCols();
if (c.size() != nRows) {
c.resize(nRows); // set size of C
c.zero(); // do not add result to C
} else c *= b;
// Determine how many rows will be handled on each processor
int nProcs = MPI_Wrappers::size(_comm);
int myRank = MPI_Wrappers::rank(_comm);
int *majorCounts = new int[nProcs];
int *offsets = new int[nProcs];
#ifdef COL_STORAGE // Column-major storage
int nMajor = nCols;
int nMinor = nRows;
int ParDenseMatrix::*majorField = &ParDenseMatrix::_nCols;
int ParDenseMatrix::*minorField = &ParDenseMatrix::_nRows;
#else // Row-major storage
int nMajor = nRows;
int nMinor = nCols;
int ParDenseMatrix::*majorField = &ParDenseMatrix::_nRows;
int ParDenseMatrix::*minorField = &ParDenseMatrix::_nCols;
#endif
for (int i = 0; i < nProcs; i++) {
// If we have an uneven row-or-col/processors number, or too few rows
// or cols, some processors will need to receive fewer rows/cols.
offsets[i] = (i * nMajor) / nProcs;
majorCounts[i] = (((i + 1) * nMajor) / nProcs) - offsets[i];
}
int myNMajor = majorCounts[myRank];
int myMajorOffset = offsets[myRank];
// Take data from an offset version of A
ParDenseMatrix<double> A_local(_comm);
A_local._data = this->_data + myMajorOffset * nMinor;
A_local.*majorField = myNMajor;
A_local.*minorField = nMinor;
#ifdef COL_STORAGE // Column-major storage
// When splitting by columns, we split the vector as well, and sum the
// results.
DenseVector<double> v_local(myNMajor);
for (int i = 0; i < myNMajor; i++)
v_local(i) = v(myMajorOffset + i);
// Store results in a local vector
DenseVector<double> c_local = A_local * v_local;
// Sum all vectors onto each processor
MPI_Wrappers::allsum(_comm, c_local.ptr(), c.ptr(), c_local.size());
#else // Row-major storage
// When splitting by rows, we use the whole vector and concatenate the
// results.
// Store results in a small local vector
DenseVector<double> c_local(myNMajor);
for (int i = 0; i < myNMajor; i++)
c_local(i) = c(myMajorOffset + i);
MultMv(A_local, v, c_local, At, a, b);
// Gather the results onto each processor
allgatherv(_comm, c_local.ptr(), c_local.size(), c.ptr(),
majorCounts, offsets);
#endif
// Clear out the local matrix's pointer so we don't double-free
A_local._data = NULL;
delete [] majorCounts;
delete [] offsets;
}
};
// Operator for dense Matrix - dense vector product
template<typename T>
DenseVector<T> operator*(const ParDenseMatrix<T> &A, const Vector<T> &b)
{
DenseVector<T> c;
A.ParMultMv(b, c, 0, 1.0, 0.0);
return c;
}
} // end namespace
#endif