forked from lijiext/lammps
Fix typos
This commit is contained in:
parent
60864e38d1
commit
50935b5cbc
|
@ -18,7 +18,7 @@ Then for every executable or library in your project:
|
|||
target_link_libraries(myTarget Kokkos::kokkos)
|
||||
````
|
||||
That's it! There is no checking Kokkos preprocessor, compiler, or linker flags.
|
||||
Kokkos propagates all the necesssary flags to your project.
|
||||
Kokkos propagates all the necessary flags to your project.
|
||||
This means not only is linking to Kokkos easy, but Kokkos itself can actually configure compiler and linker flags for *your*
|
||||
project. If building in-tree, there is no `find_package` and you link with `target_link_libraries(kokkos)`.
|
||||
|
||||
|
@ -103,7 +103,7 @@ endif()
|
|||
# Kokkos Keyword Listing
|
||||
|
||||
## Device Backends
|
||||
Device backends can be enabled by specifiying `-DKokkos_ENABLE_X`.
|
||||
Device backends can be enabled by specifying `-DKokkos_ENABLE_X`.
|
||||
|
||||
* Kokkos_ENABLE_CUDA
|
||||
* Whether to build CUDA backend
|
||||
|
@ -122,7 +122,7 @@ Device backends can be enabled by specifiying `-DKokkos_ENABLE_X`.
|
|||
* BOOL Default: ON
|
||||
|
||||
## Enable Options
|
||||
Options can be enabled by specifiying `-DKokkos_ENABLE_X`.
|
||||
Options can be enabled by specifying `-DKokkos_ENABLE_X`.
|
||||
|
||||
* Kokkos_ENABLE_AGGRESSIVE_VECTORIZATION
|
||||
* Whether to aggressively vectorize loops
|
||||
|
@ -225,7 +225,7 @@ The following options control `find_package` paths for CMake-based TPLs:
|
|||
* PATH Default:
|
||||
|
||||
## Architecture Keywords
|
||||
Architecture-specific optimizations can be enabled by specifiying `-DKokkos_ARCH_X`.
|
||||
Architecture-specific optimizations can be enabled by specifying `-DKokkos_ARCH_X`.
|
||||
|
||||
* Kokkos_ARCH_AMDAVX
|
||||
* Whether to optimize for the AMDAVX architecture
|
||||
|
|
|
@ -31,7 +31,7 @@
|
|||
- OffsetView: Kokkos::OffsetView missing constructor which takes pointer [\#2247](https://github.com/kokkos/kokkos/issues/2247)
|
||||
- OffsetView: Kokkos::OffsetView: allow offset=0 [\#2246](https://github.com/kokkos/kokkos/issues/2246)
|
||||
- DeepCopy: Missing DeepCopy instrumentation in Kokkos [\#2522](https://github.com/kokkos/kokkos/issues/2522)
|
||||
- nvcc\_wrapper: --host-only fails with mutiple -W\* flags [\#2484](https://github.com/kokkos/kokkos/issues/2484)
|
||||
- nvcc\_wrapper: --host-only fails with multiple -W\* flags [\#2484](https://github.com/kokkos/kokkos/issues/2484)
|
||||
- nvcc\_wrapper: taking first -std option is counterintuitive [\#2553](https://github.com/kokkos/kokkos/issues/2553)
|
||||
- Subview: Error taking subviews of views with static_extents of min rank [\#2448](https://github.com/kokkos/kokkos/issues/2448)
|
||||
- TeamPolicy: reducers with valuetypes without += broken on CUDA [\#2410](https://github.com/kokkos/kokkos/issues/2410)
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
# Sample script for benchmarking policy performance
|
||||
|
||||
# Suggested enviroment variables to export prior to executing script:
|
||||
# Suggested environment variables to export prior to executing script:
|
||||
# KNL:
|
||||
# OMP_NUM_THREADS=256 KMP_AFFINITY=compact
|
||||
# Power:
|
||||
|
|
|
@ -383,7 +383,7 @@ fi
|
|||
# Check unknown arguments
|
||||
################################################################################
|
||||
if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then
|
||||
echo "HPCBIND Uknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG})
|
||||
echo "HPCBIND Unknown options: ${UNKNOWN_ARGS[*]}" > >(tee -a ${HPCBIND_LOG})
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
|
@ -81,7 +81,7 @@ add_library(B b.cpp)
|
|||
target_compile_features(B PUBLIC cxx_std_14)
|
||||
target_link_libraries(A B)
|
||||
````
|
||||
I have requested two diferent features.
|
||||
I have requested two different features.
|
||||
CMake understands the requests and knows that `cxx_std_11` is a subset of `cxx_std_14`.
|
||||
CMake then picks C++14 for library `B`.
|
||||
CMake would not have been able to do feature resolution if we had directly done:
|
||||
|
@ -179,7 +179,7 @@ If you do not find the TPL where you expect it, you should error out rather than
|
|||
#### Finding TPLs
|
||||
|
||||
If finding a TPL that is not a modern CMake project, refer to the `FindHWLOC.cmake` file in `cmake/Modules` for an example.
|
||||
You will ususally need to verify expected headers with `find_path`
|
||||
You will usually need to verify expected headers with `find_path`
|
||||
````
|
||||
find_path(TPL_INCLUDE_DIR mytpl.h PATHS "${KOKKOS_MYTPL_DIR}/include")
|
||||
````
|
||||
|
|
|
@ -100,7 +100,7 @@ class UnorderedMapInsertResult {
|
|||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool existing() const { return (m_status & EXISTING); }
|
||||
|
||||
/// Did the map fail to insert the key due to insufficent capacity
|
||||
/// Did the map fail to insert the key due to insufficient capacity
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
bool failed() const { return m_index == UnorderedMapInvalidIndex; }
|
||||
|
||||
|
|
|
@ -109,7 +109,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
|||
void resize(size_t n, const Scalar& val) { assign(n, val); }
|
||||
|
||||
void assign(size_t n, const Scalar& val) {
|
||||
/* Resize if necessary (behavour of std:vector) */
|
||||
/* Resize if necessary (behavior of std:vector) */
|
||||
|
||||
if (n > span()) DV::resize(size_t(n * _extra_storage));
|
||||
_size = n;
|
||||
|
@ -234,7 +234,7 @@ class vector : public DualView<Scalar*, LayoutLeft, Arg1Type> {
|
|||
|
||||
const_reference back() const { return DV::h_view(_size - 1); }
|
||||
|
||||
/* std::algorithms wich work originally with iterators, here they are
|
||||
/* std::algorithms which work originally with iterators, here they are
|
||||
* implemented as member functions */
|
||||
|
||||
size_t lower_bound(const size_t& start, const size_t& theEnd,
|
||||
|
|
|
@ -192,7 +192,7 @@ void test_bitset() {
|
|||
|
||||
bitset_type bitset(test_sizes[i]);
|
||||
|
||||
// std::cout << " Check inital count " << std::endl;
|
||||
// std::cout << " Check initial count " << std::endl;
|
||||
// nothing should be set
|
||||
{
|
||||
Impl::TestBitsetTest<bitset_type> f(bitset);
|
||||
|
|
|
@ -64,7 +64,7 @@ struct TestViewCtorProp_EmbeddedDim {
|
|||
using DynRankViewIntType = typename Kokkos::DynRankView<int, ExecSpace>;
|
||||
using DynRankViewDoubleType = typename Kokkos::DynRankView<double, ExecSpace>;
|
||||
|
||||
// Cuda 7.0 has issues with using a lamda in parallel_for to initialize the
|
||||
// Cuda 7.0 has issues with using a lambda in parallel_for to initialize the
|
||||
// view - replace with this functor
|
||||
template <class ViewType>
|
||||
struct Functor {
|
||||
|
|
|
@ -113,10 +113,10 @@ __device__
|
|||
|
||||
#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
|
||||
|
||||
/// \brief Aquire a lock for the address
|
||||
/// \brief Acquire a lock for the address
|
||||
///
|
||||
/// This function tries to aquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully aquired the
|
||||
/// This function tries to acquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully acquired the
|
||||
/// function returns true. Otherwise it returns false.
|
||||
__device__ inline bool lock_address_cuda_space(void* ptr) {
|
||||
size_t offset = size_t(ptr);
|
||||
|
@ -131,7 +131,7 @@ __device__ inline bool lock_address_cuda_space(void* ptr) {
|
|||
///
|
||||
/// This function releases the lock for the hash value derived
|
||||
/// from the provided ptr. This function should only be called
|
||||
/// after previously successfully aquiring a lock with
|
||||
/// after previously successfully acquiring a lock with
|
||||
/// lock_address.
|
||||
__device__ inline void unlock_address_cuda_space(void* ptr) {
|
||||
size_t offset = size_t(ptr);
|
||||
|
|
|
@ -325,13 +325,13 @@ class TeamPolicyInternal<Kokkos::Cuda, Properties...>
|
|||
m_team_scratch_size{0, 0},
|
||||
m_thread_scratch_size{0, 0},
|
||||
m_chunk_size(32) {
|
||||
// Make sure league size is permissable
|
||||
// Make sure league size is permissible
|
||||
if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
|
||||
Impl::throw_runtime_exception(
|
||||
"Requested too large league_size for TeamPolicy on Cuda execution "
|
||||
"space.");
|
||||
|
||||
// Make sure total block size is permissable
|
||||
// Make sure total block size is permissible
|
||||
if (m_team_size * m_vector_length > 1024) {
|
||||
Impl::throw_runtime_exception(
|
||||
std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. "
|
||||
|
@ -351,7 +351,7 @@ class TeamPolicyInternal<Kokkos::Cuda, Properties...>
|
|||
m_team_scratch_size{0, 0},
|
||||
m_thread_scratch_size{0, 0},
|
||||
m_chunk_size(32) {
|
||||
// Make sure league size is permissable
|
||||
// Make sure league size is permissible
|
||||
if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
|
||||
Impl::throw_runtime_exception(
|
||||
"Requested too large league_size for TeamPolicy on Cuda execution "
|
||||
|
@ -367,13 +367,13 @@ class TeamPolicyInternal<Kokkos::Cuda, Properties...>
|
|||
m_team_scratch_size{0, 0},
|
||||
m_thread_scratch_size{0, 0},
|
||||
m_chunk_size(32) {
|
||||
// Make sure league size is permissable
|
||||
// Make sure league size is permissible
|
||||
if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
|
||||
Impl::throw_runtime_exception(
|
||||
"Requested too large league_size for TeamPolicy on Cuda execution "
|
||||
"space.");
|
||||
|
||||
// Make sure total block size is permissable
|
||||
// Make sure total block size is permissible
|
||||
if (m_team_size * m_vector_length > 1024) {
|
||||
Impl::throw_runtime_exception(
|
||||
std::string("Kokkos::TeamPolicy< Cuda > the team size is too large. "
|
||||
|
@ -392,7 +392,7 @@ class TeamPolicyInternal<Kokkos::Cuda, Properties...>
|
|||
m_team_scratch_size{0, 0},
|
||||
m_thread_scratch_size{0, 0},
|
||||
m_chunk_size(32) {
|
||||
// Make sure league size is permissable
|
||||
// Make sure league size is permissible
|
||||
if (league_size_ >= int(Impl::cuda_internal_maximum_grid_count()))
|
||||
Impl::throw_runtime_exception(
|
||||
"Requested too large league_size for TeamPolicy on Cuda execution "
|
||||
|
|
|
@ -304,7 +304,7 @@ struct is_space {
|
|||
host_mirror_space;
|
||||
};
|
||||
|
||||
// For backward compatiblity
|
||||
// For backward compatibility
|
||||
|
||||
namespace Impl {
|
||||
|
||||
|
|
|
@ -1450,7 +1450,7 @@ inline void deep_copy(
|
|||
typename ViewType::value_type>::value,
|
||||
"deep_copy requires non-const type");
|
||||
|
||||
// If contigous we can simply do a 1D flat loop
|
||||
// If contiguous we can simply do a 1D flat loop
|
||||
if (dst.span_is_contiguous()) {
|
||||
typedef Kokkos::View<
|
||||
typename ViewType::value_type*, Kokkos::LayoutRight,
|
||||
|
|
|
@ -130,7 +130,7 @@ int* atomic_lock_array_cuda_space_ptr(bool deallocate = false);
|
|||
/// global memory.
|
||||
///
|
||||
/// Team and Thread private scratch allocations in
|
||||
/// global memory are aquired via locks.
|
||||
/// global memory are acquired via locks.
|
||||
/// This function retrieves the lock array pointer.
|
||||
/// If the array is not yet allocated it will do so.
|
||||
int* scratch_lock_array_cuda_space_ptr(bool deallocate = false);
|
||||
|
|
|
@ -64,10 +64,10 @@ namespace Impl {
|
|||
/// This function initializes the locks to zero (unset).
|
||||
void init_lock_array_hbw_space();
|
||||
|
||||
/// \brief Aquire a lock for the address
|
||||
/// \brief Acquire a lock for the address
|
||||
///
|
||||
/// This function tries to aquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully aquired the
|
||||
/// This function tries to acquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully acquired the
|
||||
/// function returns true. Otherwise it returns false.
|
||||
bool lock_address_hbw_space(void* ptr);
|
||||
|
||||
|
@ -75,7 +75,7 @@ bool lock_address_hbw_space(void* ptr);
|
|||
///
|
||||
/// This function releases the lock for the hash value derived
|
||||
/// from the provided ptr. This function should only be called
|
||||
/// after previously successfully aquiring a lock with
|
||||
/// after previously successfully acquiring a lock with
|
||||
/// lock_address.
|
||||
void unlock_address_hbw_space(void* ptr);
|
||||
|
||||
|
|
|
@ -74,10 +74,10 @@ namespace Impl {
|
|||
/// This function initializes the locks to zero (unset).
|
||||
void init_lock_array_host_space();
|
||||
|
||||
/// \brief Aquire a lock for the address
|
||||
/// \brief Acquire a lock for the address
|
||||
///
|
||||
/// This function tries to aquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully aquired the
|
||||
/// This function tries to acquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully acquired the
|
||||
/// function returns true. Otherwise it returns false.
|
||||
bool lock_address_host_space(void* ptr);
|
||||
|
||||
|
@ -85,7 +85,7 @@ bool lock_address_host_space(void* ptr);
|
|||
///
|
||||
/// This function releases the lock for the hash value derived
|
||||
/// from the provided ptr. This function should only be called
|
||||
/// after previously successfully aquiring a lock with
|
||||
/// after previously successfully acquiring a lock with
|
||||
/// lock_address.
|
||||
void unlock_address_host_space(void* ptr);
|
||||
|
||||
|
|
|
@ -69,10 +69,10 @@ namespace Impl {
|
|||
/// This function initializes the locks to zero (unset).
|
||||
// void init_lock_array_host_space();
|
||||
|
||||
/// \brief Aquire a lock for the address
|
||||
/// \brief Acquire a lock for the address
|
||||
///
|
||||
/// This function tries to aquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully aquired the
|
||||
/// This function tries to acquire the lock for the hash value derived
|
||||
/// from the provided ptr. If the lock is successfully acquired the
|
||||
/// function returns true. Otherwise it returns false.
|
||||
// bool lock_address_host_space(void* ptr);
|
||||
|
||||
|
@ -80,7 +80,7 @@ namespace Impl {
|
|||
///
|
||||
/// This function releases the lock for the hash value derived
|
||||
/// from the provided ptr. This function should only be called
|
||||
/// after previously successfully aquiring a lock with
|
||||
/// after previously successfully acquiring a lock with
|
||||
/// lock_address.
|
||||
// void unlock_address_host_space(void* ptr);
|
||||
|
||||
|
|
|
@ -1169,7 +1169,7 @@ inline void parallel_reduce(const std::string& label, const size_t& policy,
|
|||
} // namespace Kokkos
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
// backwards compatiblity for Kokkos::Experimental reducers
|
||||
// backwards compatibility for Kokkos::Experimental reducers
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
using Kokkos::BAnd;
|
||||
|
|
|
@ -130,7 +130,7 @@ int* atomic_lock_array_rocm_space_ptr(bool deallocate = false);
|
|||
/// global memory.
|
||||
///
|
||||
/// Team and Thread private scratch allocations in
|
||||
/// global memory are aquired via locks.
|
||||
/// global memory are acquired via locks.
|
||||
/// This function retrieves the lock array pointer.
|
||||
/// If the array is not yet allocated it will do so.
|
||||
int* scratch_lock_array_rocm_space_ptr(bool deallocate = false);
|
||||
|
|
|
@ -930,7 +930,7 @@ class ParallelFor<FunctorType, Kokkos::TeamPolicy<Properties...>,
|
|||
|
||||
if (is_dynamic) {
|
||||
// Must synchronize to make sure each team has set its
|
||||
// partition before begining the work stealing loop.
|
||||
// partition before beginning the work stealing loop.
|
||||
if (data.pool_rendezvous()) data.pool_rendezvous_release();
|
||||
}
|
||||
|
||||
|
@ -1077,7 +1077,7 @@ class ParallelReduce<FunctorType, Kokkos::TeamPolicy<Properties...>,
|
|||
|
||||
if (is_dynamic) {
|
||||
// Must synchronize to make sure each team has set its
|
||||
// partition before begining the work stealing loop.
|
||||
// partition before beginning the work stealing loop.
|
||||
if (data.pool_rendezvous()) data.pool_rendezvous_release();
|
||||
}
|
||||
|
||||
|
|
|
@ -611,7 +611,7 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count,
|
|||
|
||||
// Spawn thread executing the 'driver()' function.
|
||||
// Wait until spawned thread has attempted to initialize.
|
||||
// If spawning and initialization is successfull then
|
||||
// If spawning and initialization is successful then
|
||||
// an entry in 's_threads_exec' will be assigned.
|
||||
if (ThreadsExec::spawn()) {
|
||||
wait_yield(s_threads_process.m_pool_state, ThreadsExec::Inactive);
|
||||
|
@ -639,7 +639,7 @@ void ThreadsExec::initialize(unsigned thread_count, unsigned use_numa_count,
|
|||
|
||||
if (!thread_spawn_failed) {
|
||||
// Bind process to the core on which it was located before spawning
|
||||
// occured
|
||||
// occurred
|
||||
if (hwloc_can_bind) {
|
||||
Kokkos::hwloc::bind_this_thread(proc_coord);
|
||||
}
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
#include <sched.h>
|
||||
#include <errno.h>
|
||||
|
||||
/* Standard C++ libaries */
|
||||
/* Standard C++ libraries */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
@ -153,7 +153,7 @@ void ThreadsExec::wait_yield(volatile int& flag, const int value) {
|
|||
#include <windows.h>
|
||||
#include <process.h>
|
||||
|
||||
/* Standard C++ libaries */
|
||||
/* Standard C++ libraries */
|
||||
|
||||
#include <cstdlib>
|
||||
#include <string>
|
||||
|
|
|
@ -70,7 +70,7 @@ namespace Impl {
|
|||
// called split_release
|
||||
//
|
||||
// The purporse of the split functions is to allow the last thread to arrive
|
||||
// an opprotunity to perform some actions before releasing the waiting threads
|
||||
// an opportunity to perform some actions before releasing the waiting threads
|
||||
//
|
||||
// If all threads have arrived (and split_release has been call if using
|
||||
// split_arrive) before a wait type call, the wait may return quickly
|
||||
|
|
|
@ -240,7 +240,7 @@ int HostThreadTeamData::get_work_stealing() noexcept {
|
|||
HostThreadTeamData *const *const pool =
|
||||
(HostThreadTeamData **)(m_pool_scratch + m_pool_members);
|
||||
|
||||
// Attempt from begining failed, try to steal from end of neighbor
|
||||
// Attempt from beginning failed, try to steal from end of neighbor
|
||||
|
||||
pair_int_t volatile *steal_range = &(pool[m_steal_rank]->m_work_range);
|
||||
|
||||
|
|
|
@ -267,7 +267,7 @@ union SharedAllocationTracker {
|
|||
public:
|
||||
// Use macros instead of inline functions to reduce
|
||||
// pressure on compiler optimization by reducing
|
||||
// number of symbols and inline functons.
|
||||
// number of symbols and inline functions.
|
||||
|
||||
#if defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST)
|
||||
|
||||
|
|
|
@ -499,7 +499,7 @@ class RunnableTaskBase
|
|||
void acquire_predecessor_from(runnable_task_type& other) {
|
||||
KOKKOS_EXPECTS(m_predecessor == nullptr ||
|
||||
other.m_predecessor == m_predecessor);
|
||||
// since we're transfering, no need to modify the reference count
|
||||
// since we're transferring, no need to modify the reference count
|
||||
m_predecessor = other.m_predecessor;
|
||||
other.m_predecessor = nullptr;
|
||||
}
|
||||
|
@ -508,7 +508,7 @@ class RunnableTaskBase
|
|||
void acquire_predecessor_from(runnable_task_type& other) volatile {
|
||||
KOKKOS_EXPECTS(m_predecessor == nullptr ||
|
||||
other.m_predecessor == m_predecessor);
|
||||
// since we're transfering, no need to modify the reference count
|
||||
// since we're transferring, no need to modify the reference count
|
||||
m_predecessor = other.m_predecessor;
|
||||
other.m_predecessor = nullptr;
|
||||
}
|
||||
|
|
|
@ -597,7 +597,7 @@ KOKKOS_FUNCTION void TaskQueue<ExecSpace, MemorySpace>::complete(
|
|||
// If 'task' is an aggregate then any of the runnable tasks that
|
||||
// it depends upon may be attempting to complete this 'task'.
|
||||
// Must only transition a task once to complete status.
|
||||
// This is controled by atomically locking the wait queue.
|
||||
// This is controlled by atomically locking the wait queue.
|
||||
|
||||
// Stop other tasks from adding themselves to this task's wait queue
|
||||
// by locking the head of this task's wait queue.
|
||||
|
|
|
@ -129,7 +129,7 @@ struct are_integral<T, Args...> {
|
|||
enum {
|
||||
value =
|
||||
// Accept std::is_integral OR std::is_enum as an integral value
|
||||
// since a simple enum value is automically convertable to an
|
||||
// since a simple enum value is automically convertible to an
|
||||
// integral value.
|
||||
(std::is_integral<T>::value || std::is_enum<T>::value) &&
|
||||
are_integral<Args...>::value
|
||||
|
|
|
@ -1218,7 +1218,7 @@ struct TestTeamBroadcast {
|
|||
// expected_result=%d,
|
||||
// total=%d\n",expected_result, total);
|
||||
|
||||
// team_broadcast with funtion object
|
||||
// team_broadcast with function object
|
||||
total = 0;
|
||||
|
||||
Kokkos::parallel_reduce(policy_type_f(league_size, team_size), functor,
|
||||
|
@ -1230,7 +1230,7 @@ struct TestTeamBroadcast {
|
|||
expected_result += val;
|
||||
}
|
||||
ASSERT_EQ(size_t(expected_result),
|
||||
size_t(total)); // printf("team_broadcast with funtion object --
|
||||
size_t(total)); // printf("team_broadcast with function object --
|
||||
// expected_result=%d,
|
||||
// total=%d\n",expected_result, total);
|
||||
}
|
||||
|
|
|
@ -605,7 +605,7 @@ struct functor_vec_single {
|
|||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(typename policy_type::member_type team) const {
|
||||
// Warning: this test case intentionally violates permissable semantics.
|
||||
// Warning: this test case intentionally violates permissible semantics.
|
||||
// It is not valid to get references to members of the enclosing region
|
||||
// inside a parallel_for and write to it.
|
||||
Scalar value = 0;
|
||||
|
|
|
@ -60,7 +60,7 @@ struct TestViewCtorProp_EmbeddedDim {
|
|||
using ViewIntType = typename Kokkos::View<int**, ExecSpace>;
|
||||
using ViewDoubleType = typename Kokkos::View<double*, ExecSpace>;
|
||||
|
||||
// Cuda 7.0 has issues with using a lamda in parallel_for to initialize the
|
||||
// Cuda 7.0 has issues with using a lambda in parallel_for to initialize the
|
||||
// view - replace with this functor
|
||||
template <class ViewType>
|
||||
struct Functor {
|
||||
|
|
|
@ -56,7 +56,7 @@ namespace {
|
|||
the N-th fibonacci number as follows:
|
||||
- Each "task" or "work item" computes the i-th fibonacci number
|
||||
- If a task as (i < 2), it will record the known answer ahead of time.
|
||||
- If a taks has (i >= 2), it will "spawn" two more tasks to compute
|
||||
- If a task has (i >= 2), it will "spawn" two more tasks to compute
|
||||
the (i - 1) and (i - 2) fibonacci numbers.
|
||||
We do NOT do any de-duplication of these tasks.
|
||||
De-duplication would result in only (N - 2) tasks which must be run in
|
||||
|
|
|
@ -74,7 +74,7 @@ find cmake/kokkos -name KokkosTargets.cmake -exec grep -h INTERFACE_COMPILE_OPTI
|
|||
|
||||
#-I flags and -std= flags are not part of CMake's compile options
|
||||
#that's fine, let's ignore thse below
|
||||
#redunant lines - tail the last one
|
||||
#redundant lines - tail the last one
|
||||
#awk print each on new line
|
||||
#grep out blank lines
|
||||
#grep out include flags
|
||||
|
|
|
@ -43,7 +43,7 @@ include $(KOKKOS_PATH)/Makefile.kokkos
|
|||
|
||||
build: $(EXE)
|
||||
|
||||
#for unit testing only, for best preformance with OpenMP 4.0 or better
|
||||
#for unit testing only, for best performance with OpenMP 4.0 or better
|
||||
test: $(EXE)
|
||||
./$(EXE)
|
||||
|
||||
|
|
|
@ -64,10 +64,10 @@ typedef Kokkos::HostSpace::execution_space DefaultHostType;
|
|||
// threads can grep their own. On CPU architectures the pool size is equal to
|
||||
// the thread number, on CUDA about 128k states are generated (enough to give
|
||||
// every potentially simultaneously running thread its own state). With a kernel
|
||||
// a thread is required to aquire a state from the pool and later return it. On
|
||||
// a thread is required to acquire a state from the pool and later return it. On
|
||||
// CPUs the Random number generator is deterministic if using the same number of
|
||||
// threads. On GPUs (i.e. using the CUDA backend it is not deterministic because
|
||||
// threads aquire states via atomics.
|
||||
// threads acquire states via atomics.
|
||||
|
||||
// A Functor for generating uint64_t random numbers templated on the
|
||||
// GeneratorPool type
|
||||
|
@ -97,7 +97,7 @@ struct generate_random {
|
|||
for (int k = 0; k < samples; k++)
|
||||
vals(i * samples + k) = rand_gen.urand64();
|
||||
|
||||
// Give the state back, which will allow another thread to aquire it
|
||||
// Give the state back, which will allow another thread to acquire it
|
||||
rand_pool.free_state(rand_gen);
|
||||
}
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue