forked from lijiext/lammps
1174 lines
37 KiB
C++
1174 lines
37 KiB
C++
/*
|
|
//@HEADER
|
|
// ************************************************************************
|
|
//
|
|
// Kokkos v. 2.0
|
|
// Copyright (2014) Sandia Corporation
|
|
//
|
|
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
|
|
// the U.S. Government retains certain rights in this software.
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// 1. Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
//
|
|
// 2. Redistributions in binary form must reproduce the above copyright
|
|
// notice, this list of conditions and the following disclaimer in the
|
|
// documentation and/or other materials provided with the distribution.
|
|
//
|
|
// 3. Neither the name of the Corporation nor the names of the
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
|
|
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
|
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
|
|
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
|
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
|
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
|
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
|
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
|
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
//
|
|
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
|
//
|
|
// ************************************************************************
|
|
//@HEADER
|
|
*/
|
|
|
|
#ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
|
|
#define KOKKOS_EXAMPLE_FENLFUNCTORS_HPP
|
|
|
|
#include <cstdio>
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <iomanip>
|
|
#include <cstdlib>
|
|
#include <cmath>
|
|
#include <limits>
|
|
|
|
#include <Kokkos_Pair.hpp>
|
|
#include <Kokkos_UnorderedMap.hpp>
|
|
|
|
#include <impl/Kokkos_Timer.hpp>
|
|
|
|
#include <BoxElemFixture.hpp>
|
|
#include <HexElement.hpp>
|
|
#include <CGSolve.hpp>
|
|
|
|
//----------------------------------------------------------------------------
|
|
//----------------------------------------------------------------------------
|
|
|
|
namespace Kokkos {
|
|
namespace Example {
|
|
namespace FENL {
|
|
|
|
template< class ElemNodeIdView , class CrsGraphType , unsigned ElemNode >
|
|
class NodeNodeGraph {
|
|
public:
|
|
|
|
typedef typename ElemNodeIdView::execution_space execution_space ;
|
|
typedef pair<unsigned,unsigned> key_type ;
|
|
|
|
typedef Kokkos::UnorderedMap< key_type, void , execution_space > SetType ;
|
|
typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ;
|
|
typedef Kokkos::View< unsigned , execution_space > UnsignedValue ;
|
|
|
|
// Static dimensions of 0 generate compiler warnings or errors.
|
|
typedef Kokkos::View< unsigned*[ElemNode][ElemNode] , execution_space >
|
|
ElemGraphType ;
|
|
|
|
struct TagFillNodeSet {};
|
|
struct TagScanNodeCount {};
|
|
struct TagFillGraphEntries {};
|
|
struct TagSortGraphEntries {};
|
|
struct TagFillElementGraph {};
|
|
|
|
private:
|
|
|
|
enum PhaseType { FILL_NODE_SET ,
|
|
SCAN_NODE_COUNT ,
|
|
FILL_GRAPH_ENTRIES ,
|
|
SORT_GRAPH_ENTRIES ,
|
|
FILL_ELEMENT_GRAPH };
|
|
|
|
const unsigned node_count ;
|
|
const ElemNodeIdView elem_node_id ;
|
|
UnsignedValue row_total ;
|
|
RowMapType row_count ;
|
|
RowMapType row_map ;
|
|
SetType node_node_set ;
|
|
PhaseType phase ;
|
|
|
|
public:
|
|
|
|
CrsGraphType graph ;
|
|
ElemGraphType elem_graph ;
|
|
|
|
struct Times
|
|
{
|
|
double ratio;
|
|
double fill_node_set;
|
|
double scan_node_count;
|
|
double fill_graph_entries;
|
|
double sort_graph_entries;
|
|
double fill_element_graph;
|
|
};
|
|
|
|
NodeNodeGraph( const ElemNodeIdView & arg_elem_node_id ,
|
|
const unsigned arg_node_count,
|
|
Times & results
|
|
)
|
|
: node_count(arg_node_count)
|
|
, elem_node_id( arg_elem_node_id )
|
|
, row_total( "row_total" )
|
|
, row_count(Kokkos::ViewAllocateWithoutInitializing("row_count") , node_count ) // will deep_copy to 0 inside loop
|
|
, row_map( "graph_row_map" , node_count + 1 )
|
|
, node_node_set()
|
|
, phase( FILL_NODE_SET )
|
|
, graph()
|
|
, elem_graph()
|
|
{
|
|
//--------------------------------
|
|
// Guess at capacity required for the map:
|
|
|
|
Kokkos::Timer wall_clock ;
|
|
|
|
wall_clock.reset();
|
|
phase = FILL_NODE_SET ;
|
|
|
|
// upper bound on the capacity
|
|
size_t set_capacity = (28ull * node_count) / 2;
|
|
unsigned failed_insert_count = 0 ;
|
|
|
|
do {
|
|
// Zero the row count to restart the fill
|
|
Kokkos::deep_copy( row_count , 0u );
|
|
|
|
node_node_set = SetType( ( set_capacity += failed_insert_count ) );
|
|
|
|
// May be larger that requested:
|
|
set_capacity = node_node_set.capacity();
|
|
|
|
Kokkos::parallel_reduce( Kokkos::RangePolicy<execution_space,TagFillNodeSet>(0,elem_node_id.extent(0))
|
|
, *this
|
|
, failed_insert_count );
|
|
|
|
} while ( failed_insert_count );
|
|
|
|
execution_space::fence();
|
|
results.ratio = (double)node_node_set.size() / (double)node_node_set.capacity();
|
|
results.fill_node_set = wall_clock.seconds();
|
|
//--------------------------------
|
|
|
|
wall_clock.reset();
|
|
phase = SCAN_NODE_COUNT ;
|
|
|
|
// Exclusive scan of row_count into row_map
|
|
// including the final total in the 'node_count + 1' position.
|
|
// Zero the 'row_count' values.
|
|
Kokkos::parallel_scan( node_count , *this );
|
|
|
|
// Zero the row count for the fill:
|
|
Kokkos::deep_copy( row_count , 0u );
|
|
|
|
unsigned graph_entry_count = 0 ;
|
|
|
|
Kokkos::deep_copy( graph_entry_count , row_total );
|
|
|
|
// Assign graph's row_map and allocate graph's entries
|
|
graph.row_map = row_map ;
|
|
graph.entries = typename CrsGraphType::entries_type( "graph_entries" , graph_entry_count );
|
|
|
|
//--------------------------------
|
|
// Fill graph's entries from the (node,node) set.
|
|
|
|
execution_space::fence();
|
|
results.scan_node_count = wall_clock.seconds();
|
|
|
|
wall_clock.reset();
|
|
phase = FILL_GRAPH_ENTRIES ;
|
|
Kokkos::parallel_for( node_node_set.capacity() , *this );
|
|
|
|
execution_space::fence();
|
|
results.fill_graph_entries = wall_clock.seconds();
|
|
|
|
//--------------------------------
|
|
// Done with the temporary sets and arrays
|
|
wall_clock.reset();
|
|
phase = SORT_GRAPH_ENTRIES ;
|
|
|
|
row_total = UnsignedValue();
|
|
row_count = RowMapType();
|
|
row_map = RowMapType();
|
|
node_node_set.clear();
|
|
|
|
//--------------------------------
|
|
|
|
Kokkos::parallel_for( node_count , *this );
|
|
|
|
execution_space::fence();
|
|
results.sort_graph_entries = wall_clock.seconds();
|
|
|
|
//--------------------------------
|
|
// Element-to-graph mapping:
|
|
wall_clock.reset();
|
|
phase = FILL_ELEMENT_GRAPH ;
|
|
elem_graph = ElemGraphType("elem_graph", elem_node_id.extent(0) );
|
|
Kokkos::parallel_for( elem_node_id.extent(0) , *this );
|
|
|
|
execution_space::fence();
|
|
results.fill_element_graph = wall_clock.seconds();
|
|
}
|
|
|
|
//------------------------------------
|
|
// parallel_for: create map and count row length
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const TagFillNodeSet & , unsigned ielem , unsigned & count ) const
|
|
{
|
|
// Loop over element's (row_local_node,col_local_node) pairs:
|
|
for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
|
|
|
|
const unsigned row_node = elem_node_id( ielem , row_local_node );
|
|
|
|
for ( unsigned col_local_node = row_local_node ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
|
|
|
|
const unsigned col_node = elem_node_id( ielem , col_local_node );
|
|
|
|
// If either node is locally owned then insert the pair into the unordered map:
|
|
|
|
if ( row_node < row_count.extent(0) || col_node < row_count.extent(0) ) {
|
|
|
|
const key_type key = (row_node < col_node) ? make_pair( row_node, col_node ) : make_pair( col_node, row_node ) ;
|
|
|
|
const typename SetType::insert_result result = node_node_set.insert( key );
|
|
|
|
// A successfull insert: the first time this pair was added
|
|
if ( result.success() ) {
|
|
|
|
// If row node is owned then increment count
|
|
if ( row_node < row_count.extent(0) ) { atomic_fetch_add( & row_count( row_node ) , 1 ); }
|
|
|
|
// If column node is owned and not equal to row node then increment count
|
|
if ( col_node < row_count.extent(0) && col_node != row_node ) { atomic_fetch_add( & row_count( col_node ) , 1 ); }
|
|
}
|
|
else if ( result.failed() ) {
|
|
++count ;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void fill_graph_entries( const unsigned iset ) const
|
|
{
|
|
if ( node_node_set.valid_at(iset) ) {
|
|
// Add each entry to the graph entries.
|
|
|
|
const key_type key = node_node_set.key_at(iset) ;
|
|
const unsigned row_node = key.first ;
|
|
const unsigned col_node = key.second ;
|
|
|
|
if ( row_node < row_count.extent(0) ) {
|
|
const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , 1 );
|
|
graph.entries( offset ) = col_node ;
|
|
}
|
|
|
|
if ( col_node < row_count.extent(0) && col_node != row_node ) {
|
|
const unsigned offset = graph.row_map( col_node ) + atomic_fetch_add( & row_count( col_node ) , 1 );
|
|
graph.entries( offset ) = row_node ;
|
|
}
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void sort_graph_entries( const unsigned irow ) const
|
|
{
|
|
const unsigned row_beg = graph.row_map( irow );
|
|
const unsigned row_end = graph.row_map( irow + 1 );
|
|
for ( unsigned i = row_beg + 1 ; i < row_end ; ++i ) {
|
|
const unsigned col = graph.entries(i);
|
|
unsigned j = i ;
|
|
for ( ; row_beg < j && col < graph.entries(j-1) ; --j ) {
|
|
graph.entries(j) = graph.entries(j-1);
|
|
}
|
|
graph.entries(j) = col ;
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void fill_elem_graph_map( const unsigned ielem ) const
|
|
{
|
|
for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
|
|
|
|
const unsigned row_node = elem_node_id( ielem , row_local_node );
|
|
|
|
for ( unsigned col_local_node = 0 ; col_local_node < elem_node_id.extent(1) ; ++col_local_node ) {
|
|
|
|
const unsigned col_node = elem_node_id( ielem , col_local_node );
|
|
|
|
unsigned entry = ~0u ;
|
|
|
|
if ( row_node + 1 < graph.row_map.extent(0) ) {
|
|
|
|
const unsigned entry_end = graph.row_map( row_node + 1 );
|
|
|
|
entry = graph.row_map( row_node );
|
|
|
|
for ( ; entry < entry_end && graph.entries(entry) != col_node ; ++entry );
|
|
|
|
if ( entry == entry_end ) entry = ~0u ;
|
|
}
|
|
|
|
elem_graph( ielem , row_local_node , col_local_node ) = entry ;
|
|
}
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const unsigned iwork ) const
|
|
{
|
|
/*
|
|
if ( phase == FILL_NODE_SET ) {
|
|
operator()( TagFillNodeSet() , iwork );
|
|
}
|
|
else */
|
|
if ( phase == FILL_GRAPH_ENTRIES ) {
|
|
fill_graph_entries( iwork );
|
|
}
|
|
else if ( phase == SORT_GRAPH_ENTRIES ) {
|
|
sort_graph_entries( iwork );
|
|
}
|
|
else if ( phase == FILL_ELEMENT_GRAPH ) {
|
|
fill_elem_graph_map( iwork );
|
|
}
|
|
}
|
|
|
|
//------------------------------------
|
|
// parallel_scan: row offsets
|
|
|
|
typedef unsigned value_type ;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const unsigned irow , unsigned & update , const bool final ) const
|
|
{
|
|
// exclusive scan
|
|
if ( final ) { row_map( irow ) = update ; }
|
|
|
|
update += row_count( irow );
|
|
|
|
if ( final ) {
|
|
if ( irow + 1 == row_count.extent(0) ) {
|
|
row_map( irow + 1 ) = update ;
|
|
row_total() = update ;
|
|
}
|
|
}
|
|
}
|
|
|
|
// For the reduce phase:
|
|
KOKKOS_INLINE_FUNCTION
|
|
void init( const TagFillNodeSet & , unsigned & update ) const { update = 0 ; }
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void join( const TagFillNodeSet &
|
|
, volatile unsigned & update
|
|
, volatile const unsigned & input ) const { update += input ; }
|
|
|
|
// For the scan phase::
|
|
KOKKOS_INLINE_FUNCTION
|
|
void init( unsigned & update ) const { update = 0 ; }
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void join( volatile unsigned & update
|
|
, volatile const unsigned & input ) const { update += input ; }
|
|
|
|
//------------------------------------
|
|
};
|
|
|
|
} /* namespace FENL */
|
|
} /* namespace Example */
|
|
} /* namespace Kokkos */
|
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
namespace Kokkos {
|
|
namespace Example {
|
|
namespace FENL {
|
|
|
|
template< class ElemCompType >
|
|
class NodeElemGatherFill {
|
|
public:
|
|
|
|
typedef typename ElemCompType::execution_space execution_space ;
|
|
typedef typename ElemCompType::vector_type vector_type ;
|
|
typedef typename ElemCompType::sparse_matrix_type sparse_matrix_type ;
|
|
typedef typename ElemCompType::elem_node_type elem_node_type ;
|
|
typedef typename ElemCompType::elem_vectors_type elem_vectors_type ;
|
|
typedef typename ElemCompType::elem_matrices_type elem_matrices_type ;
|
|
typedef typename ElemCompType::elem_graph_type elem_graph_type ;
|
|
|
|
static const unsigned ElemNodeCount = ElemCompType::ElemNodeCount ;
|
|
|
|
//------------------------------------
|
|
|
|
private:
|
|
|
|
typedef Kokkos::StaticCrsGraph< unsigned[2] , execution_space > CrsGraphType ;
|
|
typedef typename CrsGraphType::row_map_type::non_const_type RowMapType ;
|
|
typedef Kokkos::View< unsigned , execution_space > UnsignedValue ;
|
|
|
|
enum PhaseType { FILL_NODE_COUNT ,
|
|
SCAN_NODE_COUNT ,
|
|
FILL_GRAPH_ENTRIES ,
|
|
SORT_GRAPH_ENTRIES ,
|
|
GATHER_FILL };
|
|
|
|
const elem_node_type elem_node_id ;
|
|
const elem_graph_type elem_graph ;
|
|
UnsignedValue row_total ;
|
|
RowMapType row_count ;
|
|
RowMapType row_map ;
|
|
CrsGraphType graph ;
|
|
vector_type residual ;
|
|
sparse_matrix_type jacobian ;
|
|
elem_vectors_type elem_residual ;
|
|
elem_matrices_type elem_jacobian ;
|
|
PhaseType phase ;
|
|
|
|
public:
|
|
|
|
NodeElemGatherFill()
|
|
: elem_node_id()
|
|
, elem_graph()
|
|
, row_total()
|
|
, row_count()
|
|
, row_map()
|
|
, graph()
|
|
, residual()
|
|
, jacobian()
|
|
, elem_residual()
|
|
, elem_jacobian()
|
|
, phase( FILL_NODE_COUNT )
|
|
{}
|
|
|
|
NodeElemGatherFill( const NodeElemGatherFill & rhs )
|
|
: elem_node_id( rhs.elem_node_id )
|
|
, elem_graph( rhs.elem_graph )
|
|
, row_total( rhs.row_total )
|
|
, row_count( rhs.row_count )
|
|
, row_map( rhs.row_map )
|
|
, graph( rhs.graph )
|
|
, residual( rhs.residual )
|
|
, jacobian( rhs.jacobian )
|
|
, elem_residual( rhs.elem_residual )
|
|
, elem_jacobian( rhs.elem_jacobian )
|
|
, phase( rhs.phase )
|
|
{}
|
|
|
|
NodeElemGatherFill( const elem_node_type & arg_elem_node_id ,
|
|
const elem_graph_type & arg_elem_graph ,
|
|
const vector_type & arg_residual ,
|
|
const sparse_matrix_type & arg_jacobian ,
|
|
const elem_vectors_type & arg_elem_residual ,
|
|
const elem_matrices_type & arg_elem_jacobian )
|
|
: elem_node_id( arg_elem_node_id )
|
|
, elem_graph( arg_elem_graph )
|
|
, row_total( "row_total" )
|
|
, row_count( "row_count" , arg_residual.extent(0) )
|
|
, row_map( "graph_row_map" , arg_residual.extent(0) + 1 )
|
|
, graph()
|
|
, residual( arg_residual )
|
|
, jacobian( arg_jacobian )
|
|
, elem_residual( arg_elem_residual )
|
|
, elem_jacobian( arg_elem_jacobian )
|
|
, phase( FILL_NODE_COUNT )
|
|
{
|
|
//--------------------------------
|
|
// Count node->element relations
|
|
|
|
phase = FILL_NODE_COUNT ;
|
|
|
|
Kokkos::parallel_for( elem_node_id.extent(0) , *this );
|
|
|
|
//--------------------------------
|
|
|
|
phase = SCAN_NODE_COUNT ;
|
|
|
|
// Exclusive scan of row_count into row_map
|
|
// including the final total in the 'node_count + 1' position.
|
|
// Zero the 'row_count' values.
|
|
Kokkos::parallel_scan( residual.extent(0) , *this );
|
|
|
|
// Zero the row count for the fill:
|
|
Kokkos::deep_copy( row_count , typename RowMapType::value_type(0) );
|
|
|
|
unsigned graph_entry_count = 0 ;
|
|
|
|
Kokkos::deep_copy( graph_entry_count , row_total );
|
|
|
|
// Assign graph's row_map and allocate graph's entries
|
|
graph.row_map = row_map ;
|
|
|
|
typedef typename CrsGraphType::entries_type graph_entries_type ;
|
|
|
|
graph.entries = graph_entries_type( "graph_entries" , graph_entry_count );
|
|
|
|
//--------------------------------
|
|
// Fill graph's entries from the (node,node) set.
|
|
|
|
phase = FILL_GRAPH_ENTRIES ;
|
|
|
|
Kokkos::deep_copy( row_count , 0u );
|
|
Kokkos::parallel_for( elem_node_id.extent(0) , *this );
|
|
|
|
execution_space::fence();
|
|
|
|
//--------------------------------
|
|
// Done with the temporary sets and arrays
|
|
|
|
row_total = UnsignedValue();
|
|
row_count = RowMapType();
|
|
row_map = RowMapType();
|
|
|
|
//--------------------------------
|
|
|
|
phase = SORT_GRAPH_ENTRIES ;
|
|
Kokkos::parallel_for( residual.extent(0) , *this );
|
|
|
|
execution_space::fence();
|
|
|
|
phase = GATHER_FILL ;
|
|
}
|
|
|
|
void apply() const
|
|
{
|
|
Kokkos::parallel_for( residual.extent(0) , *this );
|
|
}
|
|
|
|
//------------------------------------
|
|
//------------------------------------
|
|
// parallel_for: Count node->element pairs
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void fill_node_count( const unsigned ielem ) const
|
|
{
|
|
for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
|
|
|
|
const unsigned row_node = elem_node_id( ielem , row_local_node );
|
|
|
|
if ( row_node < row_count.extent(0) ) {
|
|
atomic_fetch_add( & row_count( row_node ) , 1 );
|
|
}
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void fill_graph_entries( const unsigned ielem ) const
|
|
{
|
|
for ( unsigned row_local_node = 0 ; row_local_node < elem_node_id.extent(1) ; ++row_local_node ) {
|
|
|
|
const unsigned row_node = elem_node_id( ielem , row_local_node );
|
|
|
|
if ( row_node < row_count.extent(0) ) {
|
|
|
|
const unsigned offset = graph.row_map( row_node ) + atomic_fetch_add( & row_count( row_node ) , 1 );
|
|
|
|
graph.entries( offset , 0 ) = ielem ;
|
|
graph.entries( offset , 1 ) = row_local_node ;
|
|
}
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void sort_graph_entries( const unsigned irow ) const
|
|
{
|
|
const unsigned row_beg = graph.row_map( irow );
|
|
const unsigned row_end = graph.row_map( irow + 1 );
|
|
for ( unsigned i = row_beg + 1 ; i < row_end ; ++i ) {
|
|
const unsigned elem = graph.entries(i,0);
|
|
const unsigned local = graph.entries(i,1);
|
|
unsigned j = i ;
|
|
for ( ; row_beg < j && elem < graph.entries(j-1,0) ; --j ) {
|
|
graph.entries(j,0) = graph.entries(j-1,0);
|
|
graph.entries(j,1) = graph.entries(j-1,1);
|
|
}
|
|
graph.entries(j,0) = elem ;
|
|
graph.entries(j,1) = local ;
|
|
}
|
|
}
|
|
|
|
//------------------------------------
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void gather_fill( const unsigned irow ) const
|
|
{
|
|
const unsigned node_elem_begin = graph.row_map(irow);
|
|
const unsigned node_elem_end = graph.row_map(irow+1);
|
|
|
|
// for each element that a node belongs to
|
|
|
|
for ( unsigned i = node_elem_begin ; i < node_elem_end ; i++ ) {
|
|
|
|
const unsigned elem_id = graph.entries( i, 0);
|
|
const unsigned row_index = graph.entries( i, 1);
|
|
|
|
residual(irow) += elem_residual(elem_id, row_index);
|
|
|
|
// for each node in a particular related element
|
|
// gather the contents of the element stiffness
|
|
// matrix that belong in irow
|
|
|
|
for ( unsigned j = 0 ; j < ElemNodeCount ; ++j ) {
|
|
const unsigned A_index = elem_graph( elem_id , row_index , j );
|
|
|
|
jacobian.coeff( A_index ) += elem_jacobian( elem_id, row_index, j );
|
|
}
|
|
}
|
|
}
|
|
|
|
//------------------------------------
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const unsigned iwork ) const
|
|
{
|
|
if ( phase == FILL_NODE_COUNT ) {
|
|
fill_node_count( iwork );
|
|
}
|
|
else if ( phase == FILL_GRAPH_ENTRIES ) {
|
|
fill_graph_entries( iwork );
|
|
}
|
|
else if ( phase == SORT_GRAPH_ENTRIES ) {
|
|
sort_graph_entries( iwork );
|
|
}
|
|
else if ( phase == GATHER_FILL ) {
|
|
gather_fill( iwork );
|
|
}
|
|
}
|
|
|
|
//------------------------------------
|
|
// parallel_scan: row offsets
|
|
|
|
typedef unsigned value_type ;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const unsigned irow , unsigned & update , const bool final ) const
|
|
{
|
|
// exclusive scan
|
|
if ( final ) { row_map( irow ) = update ; }
|
|
|
|
update += row_count( irow );
|
|
|
|
if ( final ) {
|
|
if ( irow + 1 == row_count.extent(0) ) {
|
|
row_map( irow + 1 ) = update ;
|
|
row_total() = update ;
|
|
}
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void init( unsigned & update ) const { update = 0 ; }
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void join( volatile unsigned & update , const volatile unsigned & input ) const { update += input ; }
|
|
};
|
|
|
|
} /* namespace FENL */
|
|
} /* namespace Example */
|
|
} /* namespace Kokkos */
|
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
namespace Kokkos {
|
|
namespace Example {
|
|
namespace FENL {
|
|
|
|
template< class FiniteElementMeshType , class SparseMatrixType >
|
|
class ElementComputation ;
|
|
|
|
|
|
template< class ExecSpace , BoxElemPart::ElemOrder Order , class CoordinateMap , typename ScalarType >
|
|
class ElementComputation<
|
|
Kokkos::Example::BoxElemFixture< ExecSpace , Order , CoordinateMap > ,
|
|
Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > >
|
|
{
|
|
public:
|
|
|
|
typedef Kokkos::Example::BoxElemFixture< ExecSpace, Order, CoordinateMap > mesh_type ;
|
|
typedef Kokkos::Example::HexElement_Data< mesh_type::ElemNode > element_data_type ;
|
|
|
|
typedef Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > sparse_matrix_type ;
|
|
typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ;
|
|
|
|
typedef ExecSpace execution_space ;
|
|
typedef ScalarType scalar_type ;
|
|
|
|
static const unsigned SpatialDim = element_data_type::spatial_dimension ;
|
|
static const unsigned TensorDim = SpatialDim * SpatialDim ;
|
|
static const unsigned ElemNodeCount = element_data_type::element_node_count ;
|
|
static const unsigned FunctionCount = element_data_type::function_count ;
|
|
static const unsigned IntegrationCount = element_data_type::integration_count ;
|
|
|
|
//------------------------------------
|
|
|
|
typedef typename mesh_type::node_coord_type node_coord_type ;
|
|
typedef typename mesh_type::elem_node_type elem_node_type ;
|
|
typedef Kokkos::View< scalar_type*[FunctionCount][FunctionCount] , execution_space > elem_matrices_type ;
|
|
typedef Kokkos::View< scalar_type*[FunctionCount] , execution_space > elem_vectors_type ;
|
|
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
|
|
|
|
typedef typename NodeNodeGraph< elem_node_type , sparse_graph_type , ElemNodeCount >::ElemGraphType elem_graph_type ;
|
|
|
|
//------------------------------------
|
|
|
|
|
|
//------------------------------------
|
|
// Computational data:
|
|
|
|
const element_data_type elem_data ;
|
|
const elem_node_type elem_node_ids ;
|
|
const node_coord_type node_coords ;
|
|
const elem_graph_type elem_graph ;
|
|
const elem_matrices_type elem_jacobians ;
|
|
const elem_vectors_type elem_residuals ;
|
|
const vector_type solution ;
|
|
const vector_type residual ;
|
|
const sparse_matrix_type jacobian ;
|
|
const scalar_type coeff_K ;
|
|
|
|
ElementComputation( const ElementComputation & rhs )
|
|
: elem_data()
|
|
, elem_node_ids( rhs.elem_node_ids )
|
|
, node_coords( rhs.node_coords )
|
|
, elem_graph( rhs.elem_graph )
|
|
, elem_jacobians( rhs.elem_jacobians )
|
|
, elem_residuals( rhs.elem_residuals )
|
|
, solution( rhs.solution )
|
|
, residual( rhs.residual )
|
|
, jacobian( rhs.jacobian )
|
|
, coeff_K( rhs.coeff_K )
|
|
{}
|
|
|
|
// If the element->sparse_matrix graph is provided then perform atomic updates
|
|
// Otherwise fill per-element contributions for subequent gather-add into a residual and jacobian.
|
|
ElementComputation( const mesh_type & arg_mesh ,
|
|
const scalar_type arg_coeff_K ,
|
|
const vector_type & arg_solution ,
|
|
const elem_graph_type & arg_elem_graph ,
|
|
const sparse_matrix_type & arg_jacobian ,
|
|
const vector_type & arg_residual )
|
|
: elem_data()
|
|
, elem_node_ids( arg_mesh.elem_node() )
|
|
, node_coords( arg_mesh.node_coord() )
|
|
, elem_graph( arg_elem_graph )
|
|
, elem_jacobians()
|
|
, elem_residuals()
|
|
, solution( arg_solution )
|
|
, residual( arg_residual )
|
|
, jacobian( arg_jacobian )
|
|
, coeff_K( arg_coeff_K )
|
|
{}
|
|
|
|
ElementComputation( const mesh_type & arg_mesh ,
|
|
const scalar_type arg_coeff_K ,
|
|
const vector_type & arg_solution )
|
|
: elem_data()
|
|
, elem_node_ids( arg_mesh.elem_node() )
|
|
, node_coords( arg_mesh.node_coord() )
|
|
, elem_graph()
|
|
, elem_jacobians( "elem_jacobians" , arg_mesh.elem_count() )
|
|
, elem_residuals( "elem_residuals" , arg_mesh.elem_count() )
|
|
, solution( arg_solution )
|
|
, residual()
|
|
, jacobian()
|
|
, coeff_K( arg_coeff_K )
|
|
{}
|
|
|
|
//------------------------------------
|
|
|
|
void apply() const
|
|
{
|
|
parallel_for( elem_node_ids.extent(0) , *this );
|
|
}
|
|
|
|
//------------------------------------
|
|
|
|
static const unsigned FLOPS_transform_gradients =
|
|
/* Jacobian */ FunctionCount * TensorDim * 2 +
|
|
/* Inverse jacobian */ TensorDim * 6 + 6 +
|
|
/* Gradient transform */ FunctionCount * 15 ;
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
float transform_gradients(
|
|
const float grad[][ FunctionCount ] , // Gradient of bases master element
|
|
const double x[] ,
|
|
const double y[] ,
|
|
const double z[] ,
|
|
float dpsidx[] ,
|
|
float dpsidy[] ,
|
|
float dpsidz[] ) const
|
|
{
|
|
enum { j11 = 0 , j12 = 1 , j13 = 2 ,
|
|
j21 = 3 , j22 = 4 , j23 = 5 ,
|
|
j31 = 6 , j32 = 7 , j33 = 8 };
|
|
|
|
// Jacobian accumulation:
|
|
|
|
double J[ TensorDim ] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
|
|
|
for( unsigned i = 0; i < FunctionCount ; ++i ) {
|
|
const double x1 = x[i] ;
|
|
const double x2 = y[i] ;
|
|
const double x3 = z[i] ;
|
|
|
|
const float g1 = grad[0][i] ;
|
|
const float g2 = grad[1][i] ;
|
|
const float g3 = grad[2][i] ;
|
|
|
|
J[j11] += g1 * x1 ;
|
|
J[j12] += g1 * x2 ;
|
|
J[j13] += g1 * x3 ;
|
|
|
|
J[j21] += g2 * x1 ;
|
|
J[j22] += g2 * x2 ;
|
|
J[j23] += g2 * x3 ;
|
|
|
|
J[j31] += g3 * x1 ;
|
|
J[j32] += g3 * x2 ;
|
|
J[j33] += g3 * x3 ;
|
|
}
|
|
|
|
// Inverse jacobian:
|
|
|
|
float invJ[ TensorDim ] = {
|
|
static_cast<float>( J[j22] * J[j33] - J[j23] * J[j32] ) ,
|
|
static_cast<float>( J[j13] * J[j32] - J[j12] * J[j33] ) ,
|
|
static_cast<float>( J[j12] * J[j23] - J[j13] * J[j22] ) ,
|
|
|
|
static_cast<float>( J[j23] * J[j31] - J[j21] * J[j33] ) ,
|
|
static_cast<float>( J[j11] * J[j33] - J[j13] * J[j31] ) ,
|
|
static_cast<float>( J[j13] * J[j21] - J[j11] * J[j23] ) ,
|
|
|
|
static_cast<float>( J[j21] * J[j32] - J[j22] * J[j31] ) ,
|
|
static_cast<float>( J[j12] * J[j31] - J[j11] * J[j32] ) ,
|
|
static_cast<float>( J[j11] * J[j22] - J[j12] * J[j21] ) };
|
|
|
|
const float detJ = J[j11] * invJ[j11] +
|
|
J[j21] * invJ[j12] +
|
|
J[j31] * invJ[j13] ;
|
|
|
|
const float detJinv = 1.0 / detJ ;
|
|
|
|
for ( unsigned i = 0 ; i < TensorDim ; ++i ) { invJ[i] *= detJinv ; }
|
|
|
|
// Transform gradients:
|
|
|
|
for( unsigned i = 0; i < FunctionCount ; ++i ) {
|
|
const float g0 = grad[0][i];
|
|
const float g1 = grad[1][i];
|
|
const float g2 = grad[2][i];
|
|
|
|
dpsidx[i] = g0 * invJ[j11] + g1 * invJ[j12] + g2 * invJ[j13];
|
|
dpsidy[i] = g0 * invJ[j21] + g1 * invJ[j22] + g2 * invJ[j23];
|
|
dpsidz[i] = g0 * invJ[j31] + g1 * invJ[j32] + g2 * invJ[j33];
|
|
}
|
|
|
|
return detJ ;
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void contributeResidualJacobian(
|
|
const float coeff_k ,
|
|
const double dof_values[] ,
|
|
const float dpsidx[] ,
|
|
const float dpsidy[] ,
|
|
const float dpsidz[] ,
|
|
const float detJ ,
|
|
const float integ_weight ,
|
|
const float bases_vals[] ,
|
|
double elem_res[] ,
|
|
double elem_mat[][ FunctionCount ] ) const
|
|
{
|
|
double value_at_pt = 0 ;
|
|
double gradx_at_pt = 0 ;
|
|
double grady_at_pt = 0 ;
|
|
double gradz_at_pt = 0 ;
|
|
|
|
for ( unsigned m = 0 ; m < FunctionCount ; m++ ) {
|
|
value_at_pt += dof_values[m] * bases_vals[m] ;
|
|
gradx_at_pt += dof_values[m] * dpsidx[m] ;
|
|
grady_at_pt += dof_values[m] * dpsidy[m] ;
|
|
gradz_at_pt += dof_values[m] * dpsidz[m] ;
|
|
}
|
|
|
|
const scalar_type k_detJ_weight = coeff_k * detJ * integ_weight ;
|
|
const double res_val = value_at_pt * value_at_pt * detJ * integ_weight ;
|
|
const double mat_val = 2.0 * value_at_pt * detJ * integ_weight ;
|
|
|
|
// $$ R_i = \int_{\Omega} \nabla \phi_i \cdot (k \nabla T) + \phi_i T^2 d \Omega $$
|
|
// $$ J_{i,j} = \frac{\partial R_i}{\partial T_j} = \int_{\Omega} k \nabla \phi_i \cdot \nabla \phi_j + 2 \phi_i \phi_j T d \Omega $$
|
|
|
|
for ( unsigned m = 0; m < FunctionCount; ++m) {
|
|
double * const mat = elem_mat[m] ;
|
|
const float bases_val_m = bases_vals[m];
|
|
const float dpsidx_m = dpsidx[m] ;
|
|
const float dpsidy_m = dpsidy[m] ;
|
|
const float dpsidz_m = dpsidz[m] ;
|
|
|
|
elem_res[m] += k_detJ_weight * ( dpsidx_m * gradx_at_pt +
|
|
dpsidy_m * grady_at_pt +
|
|
dpsidz_m * gradz_at_pt ) +
|
|
res_val * bases_val_m ;
|
|
|
|
for( unsigned n = 0; n < FunctionCount; n++) {
|
|
|
|
mat[n] += k_detJ_weight * ( dpsidx_m * dpsidx[n] +
|
|
dpsidy_m * dpsidy[n] +
|
|
dpsidz_m * dpsidz[n] ) +
|
|
mat_val * bases_val_m * bases_vals[n];
|
|
}
|
|
}
|
|
}
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const unsigned ielem ) const
|
|
{
|
|
// Gather nodal coordinates and solution vector:
|
|
|
|
double x[ FunctionCount ] ;
|
|
double y[ FunctionCount ] ;
|
|
double z[ FunctionCount ] ;
|
|
double val[ FunctionCount ] ;
|
|
unsigned node_index[ ElemNodeCount ];
|
|
|
|
for ( unsigned i = 0 ; i < ElemNodeCount ; ++i ) {
|
|
const unsigned ni = elem_node_ids( ielem , i );
|
|
|
|
node_index[i] = ni ;
|
|
|
|
x[i] = node_coords( ni , 0 );
|
|
y[i] = node_coords( ni , 1 );
|
|
z[i] = node_coords( ni , 2 );
|
|
|
|
val[i] = solution( ni );
|
|
}
|
|
|
|
|
|
double elem_vec[ FunctionCount ] ;
|
|
double elem_mat[ FunctionCount ][ FunctionCount ] ;
|
|
|
|
for( unsigned i = 0; i < FunctionCount ; i++ ) {
|
|
elem_vec[i] = 0 ;
|
|
for( unsigned j = 0; j < FunctionCount ; j++){
|
|
elem_mat[i][j] = 0 ;
|
|
}
|
|
}
|
|
|
|
|
|
for ( unsigned i = 0 ; i < IntegrationCount ; ++i ) {
|
|
float dpsidx[ FunctionCount ] ;
|
|
float dpsidy[ FunctionCount ] ;
|
|
float dpsidz[ FunctionCount ] ;
|
|
|
|
const float detJ =
|
|
transform_gradients( elem_data.gradients[i] , x , y , z ,
|
|
dpsidx , dpsidy , dpsidz );
|
|
|
|
contributeResidualJacobian( coeff_K ,
|
|
val , dpsidx , dpsidy , dpsidz ,
|
|
detJ ,
|
|
elem_data.weights[i] ,
|
|
elem_data.values[i] ,
|
|
elem_vec , elem_mat );
|
|
}
|
|
|
|
#if 0
|
|
|
|
if ( 1 == ielem ) {
|
|
printf("ElemResidual { %f %f %f %f %f %f %f %f }\n",
|
|
elem_vec[0], elem_vec[1], elem_vec[2], elem_vec[3],
|
|
elem_vec[4], elem_vec[5], elem_vec[6], elem_vec[7]);
|
|
|
|
printf("ElemJacobian {\n");
|
|
|
|
for ( unsigned j = 0 ; j < FunctionCount ; ++j ) {
|
|
printf(" { %f %f %f %f %f %f %f %f }\n",
|
|
elem_mat[j][0], elem_mat[j][1], elem_mat[j][2], elem_mat[j][3],
|
|
elem_mat[j][4], elem_mat[j][5], elem_mat[j][6], elem_mat[j][7]);
|
|
}
|
|
printf("}\n");
|
|
}
|
|
|
|
#endif
|
|
|
|
if ( ! residual.extent(0) ) {
|
|
for( unsigned i = 0; i < FunctionCount ; i++){
|
|
elem_residuals(ielem, i) = elem_vec[i] ;
|
|
for( unsigned j = 0; j < FunctionCount ; j++){
|
|
elem_jacobians(ielem, i, j) = elem_mat[i][j] ;
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
for( unsigned i = 0 ; i < FunctionCount ; i++ ) {
|
|
const unsigned row = node_index[i] ;
|
|
if ( row < residual.extent(0) ) {
|
|
atomic_fetch_add( & residual( row ) , elem_vec[i] );
|
|
|
|
for( unsigned j = 0 ; j < FunctionCount ; j++ ) {
|
|
const unsigned entry = elem_graph( ielem , i , j );
|
|
if ( entry != ~0u ) {
|
|
atomic_fetch_add( & jacobian.coeff( entry ) , elem_mat[i][j] );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}; /* ElementComputation */
|
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
template< class FixtureType , class SparseMatrixType >
|
|
class DirichletComputation ;
|
|
|
|
template< class ExecSpace , BoxElemPart::ElemOrder Order , class CoordinateMap , typename ScalarType >
|
|
class DirichletComputation<
|
|
Kokkos::Example::BoxElemFixture< ExecSpace , Order , CoordinateMap > ,
|
|
Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > >
|
|
{
|
|
public:
|
|
|
|
typedef Kokkos::Example::BoxElemFixture< ExecSpace, Order, CoordinateMap > mesh_type ;
|
|
typedef typename mesh_type::node_coord_type node_coord_type ;
|
|
typedef typename node_coord_type::value_type scalar_coord_type ;
|
|
|
|
typedef Kokkos::Example::CrsMatrix< ScalarType , ExecSpace > sparse_matrix_type ;
|
|
typedef typename sparse_matrix_type::StaticCrsGraphType sparse_graph_type ;
|
|
|
|
typedef ExecSpace execution_space ;
|
|
typedef ScalarType scalar_type ;
|
|
|
|
//------------------------------------
|
|
|
|
typedef Kokkos::View< scalar_type* , execution_space > vector_type ;
|
|
|
|
//------------------------------------
|
|
// Computational data:
|
|
|
|
const node_coord_type node_coords ;
|
|
const vector_type solution ;
|
|
const sparse_matrix_type jacobian ;
|
|
const vector_type residual ;
|
|
const scalar_type bc_lower_value ;
|
|
const scalar_type bc_upper_value ;
|
|
const scalar_coord_type bc_lower_limit ;
|
|
const scalar_coord_type bc_upper_limit ;
|
|
const unsigned bc_plane ;
|
|
const unsigned node_count ;
|
|
bool init ;
|
|
|
|
|
|
DirichletComputation( const mesh_type & arg_mesh ,
|
|
const vector_type & arg_solution ,
|
|
const sparse_matrix_type & arg_jacobian ,
|
|
const vector_type & arg_residual ,
|
|
const unsigned arg_bc_plane ,
|
|
const scalar_type arg_bc_lower_value ,
|
|
const scalar_type arg_bc_upper_value )
|
|
: node_coords( arg_mesh.node_coord() )
|
|
, solution( arg_solution )
|
|
, jacobian( arg_jacobian )
|
|
, residual( arg_residual )
|
|
, bc_lower_value( arg_bc_lower_value )
|
|
, bc_upper_value( arg_bc_upper_value )
|
|
, bc_lower_limit( std::numeric_limits<scalar_coord_type>::epsilon() )
|
|
, bc_upper_limit( scalar_coord_type(1) - std::numeric_limits<scalar_coord_type>::epsilon() )
|
|
, bc_plane( arg_bc_plane )
|
|
, node_count( arg_mesh.node_count_owned() )
|
|
, init( false )
|
|
{
|
|
parallel_for( node_count , *this );
|
|
init = true ;
|
|
}
|
|
|
|
void apply() const
|
|
{
|
|
parallel_for( node_count , *this );
|
|
}
|
|
|
|
//------------------------------------
|
|
|
|
KOKKOS_INLINE_FUNCTION
|
|
void operator()( const unsigned inode ) const
|
|
{
|
|
// Apply dirichlet boundary condition on the Solution and Residual vectors.
|
|
// To maintain the symmetry of the original global stiffness matrix,
|
|
// zero out the columns that correspond to boundary conditions, and
|
|
// update the residual vector accordingly
|
|
|
|
const unsigned iBeg = jacobian.graph.row_map[inode];
|
|
const unsigned iEnd = jacobian.graph.row_map[inode+1];
|
|
|
|
const scalar_coord_type c = node_coords(inode,bc_plane);
|
|
const bool bc_lower = c <= bc_lower_limit ;
|
|
const bool bc_upper = bc_upper_limit <= c ;
|
|
|
|
if ( ! init ) {
|
|
solution(inode) = bc_lower ? bc_lower_value : (
|
|
bc_upper ? bc_upper_value : 0 );
|
|
}
|
|
else {
|
|
if ( bc_lower || bc_upper ) {
|
|
|
|
residual(inode) = 0 ;
|
|
|
|
// zero each value on the row, and leave a one
|
|
// on the diagonal
|
|
|
|
for( unsigned i = iBeg ; i < iEnd ; ++i ) {
|
|
jacobian.coeff(i) = int(inode) == int(jacobian.graph.entries(i)) ? 1 : 0 ;
|
|
}
|
|
}
|
|
else {
|
|
|
|
// Find any columns that are boundary conditions.
|
|
// Clear them and adjust the residual vector
|
|
|
|
for( unsigned i = iBeg ; i < iEnd ; ++i ) {
|
|
const unsigned cnode = jacobian.graph.entries(i) ;
|
|
const scalar_coord_type cc = node_coords(cnode,bc_plane);
|
|
|
|
if ( ( cc <= bc_lower_limit ) || ( bc_upper_limit <= cc ) ) {
|
|
jacobian.coeff(i) = 0 ;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
} /* namespace FENL */
|
|
} /* namespace Example */
|
|
} /* namespace Kokkos */
|
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
/* A Cuda-specific specialization for the element computation functor. */
|
|
#if defined( __CUDACC__ )
|
|
// #include <NonlinearElement_Cuda.hpp>
|
|
#endif
|
|
|
|
//----------------------------------------------------------------------------
|
|
|
|
#endif /* #ifndef KOKKOS_EXAMPLE_FENLFUNCTORS_HPP */
|
|
|