lammps/lib/gpu/cudpp_mini/cudpp_plan.cpp

460 lines
15 KiB
C++

// -------------------------------------------------------------
// cuDPP -- CUDA Data Parallel Primitives library
// -------------------------------------------------------------
// $Revision: 3572$
// $Date: 2007-11-19 13:58:06 +0000 (Mon, 19 Nov 2007) $
// -------------------------------------------------------------
// This source code is distributed under the terms of license.txt
// in the root directory of this source distribution.
// -------------------------------------------------------------
#include "cudpp.h"
#include "cudpp_plan_manager.h"
#include "cudpp_scan.h"
//#include "cudpp_segscan.h"
//#include "cudpp_compact.h"
//#include "cudpp_spmvmult.h"
#include "cudpp_radixsort.h"
#include <cassert>
CUDPPPlanManager* CUDPPPlanManager::m_instance = NULL;
CUDPPResult validateOptions(CUDPPConfiguration config, size_t /*numElements*/, size_t numRows, size_t /*rowPitch*/)
{
CUDPPResult ret = CUDPP_SUCCESS;
if ((config.options & CUDPP_OPTION_BACKWARD) && (config.options & CUDPP_OPTION_FORWARD))
ret = CUDPP_ERROR_ILLEGAL_CONFIGURATION;
if ((config.options & CUDPP_OPTION_EXCLUSIVE) && (config.options & CUDPP_OPTION_INCLUSIVE))
ret = CUDPP_ERROR_ILLEGAL_CONFIGURATION;
if (config.algorithm == CUDPP_COMPACT && numRows > 1)
ret = CUDPP_ERROR_ILLEGAL_CONFIGURATION; //!< @todo: add support for multi-row cudppCompact
return ret;
}
/** @addtogroup publicInterface
* @{
*/
/** @name Plan Interface
* @{
*/
/** @brief Create a CUDPP plan
*
* A plan is a data structure containing state and intermediate storage space
* that CUDPP uses to execute algorithms on data. A plan is created by
* passing to cudppPlan() a CUDPPConfiguration that specifies the algorithm,
* operator, datatype, and options. The size of the data must also be passed
* to cudppPlan(), in the \a numElements, \a numRows, and \a rowPitch
* arguments. These sizes are used to allocate internal storage space at the
* time the plan is created. The CUDPP planner may use the sizes, options,
* and information about the present hardware to choose optimal settings.
*
* Note that \a numElements is the maximum size of the array to be processed
* with this plan. That means that a plan may be re-used to process (for
* example, to sort or scan) smaller arrays.
*
* @param[out] planHandle A pointer to an opaque handle to the internal plan
* @param[in] config The configuration struct specifying algorithm and options
* @param[in] numElements The maximum number of elements to be processed
* @param[in] numRows The number of rows (for 2D operations) to be processed
* @param[in] rowPitch The pitch of the rows of input data, in elements
*/
CUDPP_DLL
CUDPPResult cudppPlan(CUDPPHandle *planHandle,
CUDPPConfiguration config,
size_t numElements,
size_t numRows,
size_t rowPitch)
{
CUDPPResult result = CUDPP_SUCCESS;
CUDPPPlan *plan;
result = validateOptions(config, numElements, numRows, rowPitch);
if (result != CUDPP_SUCCESS)
{
*planHandle = CUDPP_INVALID_HANDLE;
return result;
}
switch (config.algorithm)
{
case CUDPP_SCAN:
{
plan = new CUDPPScanPlan(config, numElements, numRows, rowPitch);
break;
}
// case CUDPP_COMPACT:
// {
// plan = new CUDPPCompactPlan(config, numElements, numRows, rowPitch);
// break;
// }
case CUDPP_SORT_RADIX:
//case CUDPP_SORT_RADIX_GLOBAL:
{
plan = new CUDPPRadixSortPlan(config, numElements);
break;
}
/* case CUDPP_SEGMENTED_SCAN:
{
plan = new CUDPPSegmentedScanPlan(config, numElements);
break;
}
//new rand plan
case CUDPP_RAND_MD5:
{
plan = new CUDPPRandPlan(config, numElements);
break;
}
case CUDPP_REDUCE:*/
default:
//! @todo: implement cudppReduce()
return CUDPP_ERROR_ILLEGAL_CONFIGURATION;
break;
}
*planHandle = CUDPPPlanManager::AddPlan(plan);
if (CUDPP_INVALID_HANDLE == *planHandle)
return CUDPP_ERROR_UNKNOWN;
else
return CUDPP_SUCCESS;
}
/** @brief Destroy a CUDPP Plan
*
* Deletes the plan referred to by \a planHandle and all associated internal
* storage.
*
* @param[in] planHandle The CUDPPHandle to the plan to be destroyed
*/
CUDPP_DLL
CUDPPResult cudppDestroyPlan(CUDPPHandle planHandle)
{
if (CUDPPPlanManager::RemovePlan(planHandle) == false)
return CUDPP_ERROR_INVALID_HANDLE;
else
return CUDPP_SUCCESS;
}
/** @brief Create a CUDPP Sparse Matrix Object
*
* The sparse matrix plan is a data structure containing state and intermediate storage space
* that CUDPP uses to perform sparse matrix dense vector multiply. This plan is created by
* passing to CUDPPSparseMatrixVectorMultiplyPlan() a CUDPPConfiguration that specifies the
* algorithm (sprarse matrix-dense vector multiply) and datatype, along with the sparse matrix
* itself in CSR format. The number of non-zero elements in the sparse matrix must also be passed
* as \a numNonZeroElements. This is used to allocate internal storage space at the time the
* sparse matrix plan is created.
*
* @param[out] sparseMatrixHandle A pointer to an opaque handle to the sparse matrix object
* @param[in] config The configuration struct specifying algorithm and options
* @param[in] numNonZeroElements The number of non zero elements in the sparse matrix
* @param[in] numRows This is the number of rows in y, x and A for y = A * x
* @param[in] A The matrix data
* @param[in] h_rowIndices An array containing the index of the start of each row in \a A
* @param[in] h_indices An array containing the index of each nonzero element in \a A
CUDPP_DLL
CUDPPResult cudppSparseMatrix(CUDPPHandle *sparseMatrixHandle,
CUDPPConfiguration config,
size_t numNonZeroElements,
size_t numRows,
const void *A,
const unsigned int *h_rowIndices,
const unsigned int *h_indices)
{
CUDPPResult result = CUDPP_SUCCESS;
CUDPPPlan *sparseMatrix;
if ((config.algorithm != CUDPP_SPMVMULT) ||
(numNonZeroElements <= 0) || (numRows <= 0))
{
result = CUDPP_ERROR_ILLEGAL_CONFIGURATION;
}
if (result != CUDPP_SUCCESS)
{
*sparseMatrixHandle = CUDPP_INVALID_HANDLE;
return result;
}
sparseMatrix =
new CUDPPSparseMatrixVectorMultiplyPlan(config, numNonZeroElements, A,
h_rowIndices, h_indices, numRows);
*sparseMatrixHandle = CUDPPPlanManager::AddPlan(sparseMatrix);
if (CUDPP_INVALID_HANDLE == *sparseMatrixHandle)
return CUDPP_ERROR_UNKNOWN;
else
return CUDPP_SUCCESS;
}
*/
/** @brief Destroy a CUDPP Sparse Matrix Object
*
* Deletes the sparse matrix data and plan referred to by \a sparseMatrixHandle
* and all associated internal storage.
*
* @param[in] sparseMatrixHandle The CUDPPHandle to the matrix object to be destroyed
CUDPP_DLL
CUDPPResult cudppDestroySparseMatrix(CUDPPHandle sparseMatrixHandle)
{
return cudppDestroyPlan(sparseMatrixHandle);
}
*/
/** @} */ // end Plan Interface
/** @} */ // end publicInterface
/** @brief Plan base class constructor
*
* @param[in] config The configuration struct specifying algorithm and options
* @param[in] numElements The maximum number of elements to be processed
* @param[in] numRows The number of rows (for 2D operations) to be processed
* @param[in] rowPitch The pitch of the rows of input data, in elements
*/
CUDPPPlan::CUDPPPlan(CUDPPConfiguration config,
size_t numElements,
size_t numRows,
size_t rowPitch)
: m_config(config),
m_numElements(numElements),
m_numRows(numRows),
m_rowPitch(rowPitch)
{
}
/** @brief Scan Plan constructor
*
* @param[in] config The configuration struct specifying algorithm and options
* @param[in] numElements The maximum number of elements to be scanned
* @param[in] numRows The maximum number of rows (for 2D operations) to be scanned
* @param[in] rowPitch The pitch of the rows of input data, in elements
*/
CUDPPScanPlan::CUDPPScanPlan(CUDPPConfiguration config,
size_t numElements,
size_t numRows,
size_t rowPitch)
: CUDPPPlan(config, numElements, numRows, rowPitch),
m_blockSums(0),
m_rowPitches(0),
m_numEltsAllocated(0),
m_numRowsAllocated(0),
m_numLevelsAllocated(0)
{
allocScanStorage(this);
}
/** @brief CUDPP scan plan destructor */
CUDPPScanPlan::~CUDPPScanPlan()
{
freeScanStorage(this);
}
/** @brief SegmentedScan Plan constructor
*
* @param[in] config The configuration struct specifying options
* @param[in] numElements The maximum number of elements to be scanned
CUDPPSegmentedScanPlan::CUDPPSegmentedScanPlan(CUDPPConfiguration config,
size_t numElements)
: CUDPPPlan(config, numElements, 1, 0),
m_blockSums(0),
m_blockFlags(0),
m_blockIndices(0),
m_numEltsAllocated(0),
m_numLevelsAllocated(0)
{
allocSegmentedScanStorage(this);
}
*/
/** @brief SegmentedScan plan destructor
CUDPPSegmentedScanPlan::~CUDPPSegmentedScanPlan()
{
freeSegmentedScanStorage(this);
}
*/
/** @brief Compact Plan constructor
*
* @param[in] config The configuration struct specifying options
* @param[in] numElements The maximum number of elements to be compacted
* @param[in] numRows The number of rows (for 2D operations) to be compacted
* @param[in] rowPitch The pitch of the rows of input data, in elements
CUDPPCompactPlan::CUDPPCompactPlan(CUDPPConfiguration config,
size_t numElements,
size_t numRows,
size_t rowPitch)
: CUDPPPlan(config, numElements, numRows, rowPitch),
m_d_outputIndices(0)
{
assert(numRows == 1); //!< @todo Add support for multirow compaction
CUDPPConfiguration scanConfig =
{
CUDPP_SCAN,
CUDPP_ADD,
CUDPP_UINT,
(config.options & CUDPP_OPTION_BACKWARD) ?
CUDPP_OPTION_BACKWARD | CUDPP_OPTION_EXCLUSIVE :
CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE
};
m_scanPlan = new CUDPPScanPlan(scanConfig, numElements, numRows, rowPitch);
allocCompactStorage(this);
}
*/
/** @brief Compact plan destructor
CUDPPCompactPlan::~CUDPPCompactPlan()
{
delete m_scanPlan;
freeCompactStorage(this);
}
*/
/** @brief Sort Plan constructor
*
* @param[in] config The configuration struct specifying algorithm and options
* @param[in] numElements The maximum number of elements to be sorted
*/
/*CUDPPSortPlan::CUDPPSortPlan(CUDPPConfiguration config, size_t numElements)
: CUDPPPlan(config, numElements, 1, 0),
m_scanPlan(0),
m_d_temp(0),
m_d_tempAddress(0)
{
CUDPPConfiguration scanConfig =
{
CUDPP_SCAN,
CUDPP_ADD,
CUDPP_UINT,
CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE
};
//if (config.algorithm == CUDPP_SORT_RADIX_GLOBAL)
{
m_scanPlan = new CUDPPScanPlan(scanConfig, numElements, 1, 0);
}
allocSortStorage(this);
}*/
/** @brief Sort plan destructor */
/*CUDPPSortPlan::~CUDPPSortPlan()
{
delete m_scanPlan;
freeSortStorage(this);
}*/
CUDPPRadixSortPlan::CUDPPRadixSortPlan(CUDPPConfiguration config, size_t numElements)
: CUDPPPlan(config, numElements, 1, 0),
m_scanPlan(0),
m_tempKeys(0),
m_tempValues(0),
m_counters(0),
m_countersSum(0),
m_blockOffsets(0)
{
size_t numBlocks2 = ((numElements % (SORT_CTA_SIZE * 2)) == 0) ?
(numElements / (SORT_CTA_SIZE * 2)) : (numElements / (SORT_CTA_SIZE * 2) + 1);
CUDPPConfiguration scanConfig =
{
CUDPP_SCAN,
CUDPP_ADD,
CUDPP_UINT,
CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE
};
if(m_config.options == CUDPP_OPTION_KEYS_ONLY)
m_bKeysOnly = true;
else
m_bKeysOnly = false;
m_scanPlan = new CUDPPScanPlan(scanConfig, numBlocks2*16, 1, 0);
allocRadixSortStorage(this);
}
CUDPPRadixSortPlan::~CUDPPRadixSortPlan()
{
delete m_scanPlan;
freeRadixSortStorage(this);
}
/** @brief SparseMatrixVectorMultiply Plan constructor
*
* @param[in] config The configuration struct specifying options
* @param[in] numNonZeroElements The number of non-zero elements in sparse matrix
* @param[in] A Array of non-zero matrix elements
* @param[in] rowIndex Array of indices of the first element of each row
* in the "flattened" version of the sparse matrix
* @param[in] index Array of indices of non-zero elements in the matrix
* @param[in] numRows The number of rows in the sparse matrix
CUDPPSparseMatrixVectorMultiplyPlan::CUDPPSparseMatrixVectorMultiplyPlan(
CUDPPConfiguration config,
size_t numNonZeroElements,
const void *A,
const unsigned int *rowIndex,
const unsigned int *index,
size_t numRows
)
: CUDPPPlan(config, numNonZeroElements, 1, 0),
m_segmentedScanPlan(0),
m_d_prod(0),
m_d_flags(0),
m_d_rowFinalIndex(0),
m_rowFinalIndex(0),
m_numRows(numRows),
m_numNonZeroElements(numNonZeroElements)
{
CUDPPConfiguration segScanConfig =
{
CUDPP_SEGMENTED_SCAN,
CUDPP_ADD,
config.datatype,
(CUDPP_OPTION_FORWARD | CUDPP_OPTION_INCLUSIVE)
};
m_segmentedScanPlan = new CUDPPSegmentedScanPlan(segScanConfig, m_numNonZeroElements);
// Generate an array of the indices of the last element of each row
// in the "flattened" version of the sparse matrix
m_rowFinalIndex = new unsigned int [m_numRows];
for (unsigned int i=0; i < m_numRows; ++i)
{
if (i < m_numRows-1)
m_rowFinalIndex[i] = rowIndex[i+1];
else
m_rowFinalIndex[i] = (unsigned int)numNonZeroElements;
}
allocSparseMatrixVectorMultiplyStorage(this, A, rowIndex, index);
}
*/
/** @brief Sparse matrix-vector plan destructor
CUDPPSparseMatrixVectorMultiplyPlan::~CUDPPSparseMatrixVectorMultiplyPlan()
{
freeSparseMatrixVectorMultiplyStorage(this);
delete m_segmentedScanPlan;
delete [] m_rowFinalIndex;
}
*/
/** @brief CUDPP Rand Plan Constructor
* @param[in] config The configuration struct specifying options
* @param[in] num_elements The number of elements to generate random bits for
CUDPPRandPlan::CUDPPRandPlan(CUDPPConfiguration config, size_t num_elements)
: CUDPPPlan(config, num_elements, 1, 0),
m_seed(0)
{
}
*/