From 5c1d17d1c031f78fbe08d0500bb3396087464b4b Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 8 Jun 2017 10:42:08 -0600 Subject: [PATCH] Updating Kokkos lib to v2.03.05 --- lib/kokkos/CHANGELOG.md | 48 + lib/kokkos/CMakeLists.txt | 46 +- lib/kokkos/Makefile.kokkos | 360 +- lib/kokkos/Makefile.targets | 13 +- .../algorithms/src/KokkosAlgorithms_dummy.cpp | 1 + lib/kokkos/algorithms/src/Kokkos_Random.hpp | 6 +- lib/kokkos/algorithms/unit_tests/Makefile | 19 +- lib/kokkos/algorithms/unit_tests/TestCuda.cpp | 18 +- .../algorithms/unit_tests/TestOpenMP.cpp | 18 +- .../algorithms/unit_tests/TestRandom.hpp | 6 +- .../algorithms/unit_tests/TestSerial.cpp | 16 +- lib/kokkos/algorithms/unit_tests/TestSort.hpp | 21 +- .../algorithms/unit_tests/TestThreads.cpp | 16 +- .../benchmarks/bytes_and_flops/Makefile | 6 +- lib/kokkos/benchmarks/gather/Makefile | 8 +- lib/kokkos/cmake/KokkosConfig.cmake.in | 18 + lib/kokkos/cmake/Modules/FindHWLOC.cmake | 20 + lib/kokkos/cmake/Modules/FindMemkind.cmake | 20 + lib/kokkos/cmake/Modules/FindQthreads.cmake | 20 + lib/kokkos/cmake/kokkos.cmake | 1198 + .../kokkos-trilinos-integration-procedure.txt | 44 +- .../config/kokkos_dev/config-core-all.sh | 2 +- .../kokkos_dev/config-core-cuda-omp-hwloc.sh | 2 +- .../config/kokkos_dev/config-core-cuda.sh | 2 +- lib/kokkos/config/master_history.txt | 3 +- lib/kokkos/config/snapshot.py | 68 +- lib/kokkos/config/test_all_sandia | 17 +- .../testing_scripts/jenkins_test_driver | 2 +- .../config/trilinos-integration/checkin-test | 4 + .../prepare_trilinos_repos.sh | 25 +- .../shepard_jenkins_run_script_pthread_intel | 60 + .../shepard_jenkins_run_script_serial_intel | 60 + .../white_run_jenkins_script_cuda | 63 + .../white_run_jenkins_script_omp | 58 + .../containers/performance_tests/Makefile | 11 +- .../containers/performance_tests/TestCuda.cpp | 20 +- .../performance_tests/TestDynRankView.hpp | 9 +- .../performance_tests/TestOpenMP.cpp | 16 +- .../performance_tests/TestThreads.cpp | 16 +- lib/kokkos/containers/src/Kokkos_Bitset.hpp | 9 +- lib/kokkos/containers/src/Kokkos_DualView.hpp | 12 + .../containers/src/Kokkos_DynRankView.hpp | 289 +- .../containers/src/Kokkos_DynamicView.hpp | 10 +- .../containers/src/Kokkos_ErrorReporter.hpp | 11 +- .../containers/src/Kokkos_Functional.hpp | 9 +- .../containers/src/Kokkos_UnorderedMap.hpp | 11 +- lib/kokkos/containers/src/Kokkos_Vector.hpp | 9 +- .../src/impl/Kokkos_Bitset_impl.hpp | 2 +- .../src/impl/Kokkos_Functional_impl.hpp | 11 +- .../src/impl/Kokkos_UnorderedMap_impl.hpp | 11 +- lib/kokkos/containers/unit_tests/Makefile | 19 +- .../containers/unit_tests/TestComplex.hpp | 263 - lib/kokkos/containers/unit_tests/TestCuda.cpp | 16 +- .../containers/unit_tests/TestDualView.hpp | 9 +- .../containers/unit_tests/TestDynamicView.hpp | 9 +- .../unit_tests/TestErrorReporter.hpp | 9 +- .../containers/unit_tests/TestOpenMP.cpp | 23 +- .../containers/unit_tests/TestSerial.cpp | 24 +- .../unit_tests/TestStaticCrsGraph.hpp | 9 +- .../containers/unit_tests/TestThreads.cpp | 18 +- .../unit_tests/TestUnorderedMap.hpp | 9 +- .../containers/unit_tests/TestVector.hpp | 9 +- lib/kokkos/core/cmake/KokkosCore_config.h.in | 93 +- lib/kokkos/core/perf_test/CMakeLists.txt | 13 +- lib/kokkos/core/perf_test/Makefile | 53 +- lib/kokkos/core/perf_test/PerfTestCuda.cpp | 199 - lib/kokkos/core/perf_test/PerfTestDriver.hpp | 86 - ...ramSchmidt.hpp => PerfTestGramSchmidt.cpp} | 61 +- ...erfTestHexGrad.hpp => PerfTestHexGrad.cpp} | 57 + lib/kokkos/core/perf_test/PerfTestHost.cpp | 125 - lib/kokkos/core/perf_test/PerfTestMain.cpp | 31 +- .../PerfTest_Category.hpp} | 26 +- .../perf_test/PerfTest_CustomReduction.cpp | 115 + lib/kokkos/core/perf_test/run_mempool.sh | 25 + lib/kokkos/core/perf_test/run_mempool_fill.sh | 21 + lib/kokkos/core/perf_test/run_taskdag.sh | 21 + lib/kokkos/core/perf_test/test_mempool.cpp | 357 + lib/kokkos/core/perf_test/test_taskdag.cpp | 284 + .../src/Cuda/KokkosExp_Cuda_IterateTile.hpp | 11 +- lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp | 16 +- lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 15 +- .../core/src/Cuda/Kokkos_Cuda_Alloc.hpp | 12 +- .../core/src/Cuda/Kokkos_Cuda_Error.hpp | 11 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp | 41 +- .../core/src/Cuda/Kokkos_Cuda_Internal.hpp | 13 +- .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 1012 +- .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 282 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp | 21 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp | 54 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp | 982 + .../src/Cuda/Kokkos_Cuda_Vectorization.hpp | 11 +- lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 10 +- .../core/src/Cuda/Kokkos_Cuda_abort.hpp | 12 +- .../core/src/KokkosExp_MDRangePolicy.hpp | 4 +- lib/kokkos/core/src/Kokkos_Complex.hpp | 124 +- lib/kokkos/core/src/Kokkos_Concepts.hpp | 9 +- lib/kokkos/core/src/Kokkos_Core.hpp | 10 +- lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 33 +- lib/kokkos/core/src/Kokkos_Cuda.hpp | 12 +- lib/kokkos/core/src/Kokkos_CudaSpace.hpp | 15 +- lib/kokkos/core/src/Kokkos_ExecPolicy.hpp | 2 - lib/kokkos/core/src/Kokkos_HBWSpace.hpp | 11 +- lib/kokkos/core/src/Kokkos_HostSpace.hpp | 5 +- lib/kokkos/core/src/Kokkos_Layout.hpp | 18 +- lib/kokkos/core/src/Kokkos_Macros.hpp | 22 +- lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 1997 +- lib/kokkos/core/src/Kokkos_NumericTraits.hpp | 217 + lib/kokkos/core/src/Kokkos_OpenMP.hpp | 34 +- lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp | 186 + .../core/src/Kokkos_OpenMPTargetSpace.hpp | 265 + .../core/src/Kokkos_Parallel_Reduce.hpp | 1012 +- lib/kokkos/core/src/Kokkos_Qthreads.hpp | 9 +- lib/kokkos/core/src/Kokkos_ScratchSpace.hpp | 17 +- lib/kokkos/core/src/Kokkos_Serial.hpp | 11 +- lib/kokkos/core/src/Kokkos_TaskScheduler.hpp | 103 +- lib/kokkos/core/src/Kokkos_Threads.hpp | 10 +- lib/kokkos/core/src/Kokkos_Timer.hpp | 11 +- lib/kokkos/core/src/Kokkos_Vectorization.hpp | 9 +- lib/kokkos/core/src/Kokkos_View.hpp | 178 +- lib/kokkos/core/src/Makefile | 20 +- ..._OpenMPexec.cpp => Kokkos_OpenMP_Exec.cpp} | 67 +- ..._OpenMPexec.hpp => Kokkos_OpenMP_Exec.hpp} | 15 +- .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 98 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 33 +- .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 9 +- .../OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp | 306 + .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp | 273 + .../OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp | 727 + .../Kokkos_OpenMPTarget_Parallel.hpp | 767 + .../OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp | 329 + .../OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp | 356 + .../core/src/Qthreads/Kokkos_QthreadsExec.cpp | 14 +- .../core/src/Qthreads/Kokkos_QthreadsExec.hpp | 5 + .../src/Qthreads/Kokkos_Qthreads_Parallel.hpp | 5 + .../src/Qthreads/Kokkos_Qthreads_Task.cpp | 8 +- .../src/Qthreads/Kokkos_Qthreads_Task.hpp | 3 +- .../Kokkos_Qthreads_TaskPolicy.cpp.old | 21 +- .../Kokkos_Qthreads_TaskPolicy.hpp.old | 16 +- .../Qthreads/Kokkos_Qthreads_TaskQueue.hpp | 10 +- .../Kokkos_Qthreads_TaskQueue_impl.hpp | 7 +- .../core/src/Threads/Kokkos_ThreadsExec.cpp | 18 +- .../core/src/Threads/Kokkos_ThreadsExec.hpp | 7 +- .../src/Threads/Kokkos_ThreadsExec_base.cpp | 22 +- .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 95 +- .../src/Threads/Kokkos_Threads_Parallel.hpp | 32 +- .../src/impl/KokkosExp_Host_IterateTile.hpp | 138 +- .../core/src/impl/Kokkos_AnalyzePolicy.hpp | 1 + .../core/src/impl/Kokkos_Atomic_Assembly.hpp | 11 +- .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 1 + .../core/src/impl/Kokkos_Atomic_Decrement.hpp | 2 + .../core/src/impl/Kokkos_Atomic_Exchange.hpp | 3 +- .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 3 + .../core/src/impl/Kokkos_Atomic_Fetch_And.hpp | 10 +- .../core/src/impl/Kokkos_Atomic_Fetch_Or.hpp | 10 +- .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 10 +- .../core/src/impl/Kokkos_Atomic_Generic.hpp | 14 +- .../core/src/impl/Kokkos_Atomic_Increment.hpp | 2 + .../core/src/impl/Kokkos_Atomic_View.hpp | 9 +- .../core/src/impl/Kokkos_Atomic_Windows.hpp | 9 +- lib/kokkos/core/src/impl/Kokkos_BitOps.hpp | 37 +- .../core/src/impl/Kokkos_CPUDiscovery.cpp | 1 + lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp | 106 + .../core/src/impl/Kokkos_ConcurrentBitset.hpp | 357 + lib/kokkos/core/src/impl/Kokkos_Core.cpp | 177 +- lib/kokkos/core/src/impl/Kokkos_Error.cpp | 17 +- lib/kokkos/core/src/impl/Kokkos_Error.hpp | 4 +- .../core/src/impl/Kokkos_ExecPolicy.cpp | 44 + .../core/src/impl/Kokkos_FunctorAdapter.hpp | 55 + .../core/src/impl/Kokkos_FunctorAnalysis.hpp | 350 +- lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp | 10 +- lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 47 +- .../core/src/impl/Kokkos_HostThreadTeam.hpp | 75 +- .../core/src/impl/Kokkos_Memory_Fence.hpp | 2 +- lib/kokkos/core/src/impl/Kokkos_OldMacros.hpp | 12 + .../core/src/impl/Kokkos_PhysicalLayout.hpp | 13 +- .../src/impl/Kokkos_Profiling_DeviceInfo.hpp | 8 +- .../src/impl/Kokkos_Profiling_Interface.cpp | 374 +- .../src/impl/Kokkos_Profiling_Interface.hpp | 138 +- lib/kokkos/core/src/impl/Kokkos_Reducer.hpp | 317 - lib/kokkos/core/src/impl/Kokkos_Serial.cpp | 11 +- .../core/src/impl/Kokkos_Serial_Task.cpp | 24 +- .../core/src/impl/Kokkos_Serial_Task.hpp | 1 + .../core/src/impl/Kokkos_SharedAlloc.cpp | 29 +- .../core/src/impl/Kokkos_SharedAlloc.hpp | 21 +- .../core/src/impl/Kokkos_StaticAssert.hpp | 9 +- lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp | 14 +- .../core/src/impl/Kokkos_TaskQueue_impl.hpp | 13 +- lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 16 +- lib/kokkos/core/src/impl/Kokkos_Utilities.hpp | 3 +- .../core/src/impl/Kokkos_ViewMapping.hpp | 33 +- .../core/src/impl/Kokkos_Volatile_Load.hpp | 2 - lib/kokkos/core/src/impl/Kokkos_hwloc.cpp | 1 - lib/kokkos/core/src/impl/Kokkos_spinwait.cpp | 8 +- lib/kokkos/core/unit_test/CMakeLists.txt | 101 +- lib/kokkos/core/unit_test/Makefile | 182 +- lib/kokkos/core/unit_test/TestAggregate.hpp | 5 + lib/kokkos/core/unit_test/TestAtomic.hpp | 49 + .../core/unit_test/TestAtomicOperations.hpp | 83 +- lib/kokkos/core/unit_test/TestAtomicViews.hpp | 36 + lib/kokkos/core/unit_test/TestCXX11.hpp | 14 + .../core/unit_test/TestCXX11Deduction.hpp | 7 + .../core/unit_test/TestCompilerMacros.hpp | 7 + lib/kokkos/core/unit_test/TestComplex.hpp | 243 + .../core/unit_test/TestConcurrentBitset.hpp | 177 + .../unit_test/TestDefaultDeviceType_d.cpp | 237 - .../core/unit_test/TestFunctorAnalysis.hpp | 153 + lib/kokkos/core/unit_test/TestInit.hpp | 76 + lib/kokkos/core/unit_test/TestMDRange.hpp | 17 +- lib/kokkos/core/unit_test/TestMemoryPool.hpp | 975 +- .../core/unit_test/TestPolicyConstruction.hpp | 9 + lib/kokkos/core/unit_test/TestRange.hpp | 116 +- lib/kokkos/core/unit_test/TestReduce.hpp | 1002 +- .../unit_test/TestReduceCombinatorical.hpp | 597 + lib/kokkos/core/unit_test/TestScan.hpp | 28 +- .../core/unit_test/TestTaskScheduler.hpp | 146 +- lib/kokkos/core/unit_test/TestTeam.hpp | 12 +- lib/kokkos/core/unit_test/TestTeamVector.hpp | 187 +- .../unit_test/TestTemplateMetaFunctions.hpp | 8 + lib/kokkos/core/unit_test/TestTile.hpp | 27 + lib/kokkos/core/unit_test/TestViewAPI.hpp | 140 +- ...tViewMapping.hpp => TestViewMapping_a.hpp} | 380 +- .../core/unit_test/TestViewMapping_b.hpp | 186 + .../unit_test/TestViewMapping_subview.hpp | 211 + lib/kokkos/core/unit_test/TestViewOfClass.hpp | 5 + lib/kokkos/core/unit_test/TestViewSubview.hpp | 9 +- .../core/unit_test/UnitTestMainInit.cpp | 54 + ..._a.cpp => TestCudaHostPinned_Category.hpp} | 27 +- .../cuda/TestCudaHostPinned_SharedAlloc.cpp | 55 + .../cuda/TestCudaHostPinned_ViewAPI.cpp | 45 + .../cuda/TestCudaHostPinned_ViewMapping_a.cpp | 46 + .../cuda/TestCudaHostPinned_ViewMapping_b.cpp | 46 + ...TestCudaHostPinned_ViewMapping_subview.cpp | 46 + ...uctions_a.cpp => TestCudaUVM_Category.hpp} | 25 +- .../cuda/TestCudaUVM_SharedAlloc.cpp | 55 + .../unit_test/cuda/TestCudaUVM_ViewAPI.cpp | 45 + .../cuda/TestCudaUVM_ViewMapping_a.cpp | 46 + .../cuda/TestCudaUVM_ViewMapping_b.cpp | 46 + .../cuda/TestCudaUVM_ViewMapping_subview.cpp | 46 + .../cuda/TestCuda_AtomicOperations.cpp | 46 + .../unit_test/cuda/TestCuda_AtomicViews.cpp | 47 + .../core/unit_test/cuda/TestCuda_Atomics.cpp | 161 +- .../core/unit_test/cuda/TestCuda_Category.hpp | 65 + .../core/unit_test/cuda/TestCuda_Complex.cpp | 47 + ...stCuda_ViewAPI_g.cpp => TestCuda_Init.cpp} | 13 +- .../core/unit_test/cuda/TestCuda_MDRange.cpp | 47 + .../core/unit_test/cuda/TestCuda_Other.cpp | 158 +- .../unit_test/cuda/TestCuda_RangePolicy.cpp | 47 + .../unit_test/cuda/TestCuda_Reductions.cpp | 48 + .../unit_test/cuda/TestCuda_Reductions_b.cpp | 138 - .../core/unit_test/cuda/TestCuda_Scan.cpp | 47 + .../unit_test/cuda/TestCuda_SharedAlloc.cpp | 55 + .../core/unit_test/cuda/TestCuda_Spaces.cpp | 3 +- .../unit_test/cuda/TestCuda_SubView_a.cpp | 47 +- .../unit_test/cuda/TestCuda_SubView_b.cpp | 19 +- .../unit_test/cuda/TestCuda_SubView_c01.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c02.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c03.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c04.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c05.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c06.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c07.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c08.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c09.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c10.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c11.cpp | 7 +- .../unit_test/cuda/TestCuda_SubView_c12.cpp | 7 +- .../core/unit_test/cuda/TestCuda_Task.cpp | 47 + .../core/unit_test/cuda/TestCuda_Team.cpp | 89 +- .../cuda/TestCuda_TeamReductionScan.cpp | 82 + .../TestCuda_TeamScratch.cpp} | 80 +- .../unit_test/cuda/TestCuda_ViewAPI_b.cpp | 13 +- .../unit_test/cuda/TestCuda_ViewMapping_a.cpp | 46 + .../unit_test/cuda/TestCuda_ViewMapping_b.cpp | 46 + .../cuda/TestCuda_ViewMapping_subview.cpp | 46 + .../unit_test/cuda/TestCuda_ViewOfClass.cpp | 46 + .../{ => default}/TestDefaultDeviceType.cpp | 31 +- .../TestDefaultDeviceTypeInit_1.cpp | 0 .../TestDefaultDeviceTypeInit_10.cpp | 0 .../TestDefaultDeviceTypeInit_11.cpp | 0 .../TestDefaultDeviceTypeInit_12.cpp | 0 .../TestDefaultDeviceTypeInit_13.cpp | 0 .../TestDefaultDeviceTypeInit_14.cpp | 0 .../TestDefaultDeviceTypeInit_15.cpp | 0 .../TestDefaultDeviceTypeInit_16.cpp | 0 .../TestDefaultDeviceTypeInit_2.cpp | 0 .../TestDefaultDeviceTypeInit_3.cpp | 0 .../TestDefaultDeviceTypeInit_4.cpp | 0 .../TestDefaultDeviceTypeInit_5.cpp | 0 .../TestDefaultDeviceTypeInit_6.cpp | 0 .../TestDefaultDeviceTypeInit_7.cpp | 0 .../TestDefaultDeviceTypeInit_8.cpp | 0 .../TestDefaultDeviceTypeInit_9.cpp | 0 .../TestDefaultDeviceType_Category.hpp | 67 + .../{ => default}/TestDefaultDeviceType_a.cpp | 15 +- .../{ => default}/TestDefaultDeviceType_b.cpp | 16 +- .../{ => default}/TestDefaultDeviceType_c.cpp | 16 +- .../default/TestDefaultDeviceType_d.cpp | 73 + .../core/unit_test/openmp/TestOpenMP.hpp | 1 + .../openmp/TestOpenMP_AtomicOperations.cpp | 46 + .../openmp/TestOpenMP_AtomicViews.cpp | 47 + .../unit_test/openmp/TestOpenMP_Atomics.cpp | 159 +- .../unit_test/openmp/TestOpenMP_Category.hpp | 65 + .../unit_test/openmp/TestOpenMP_Complex.cpp | 47 + .../TestOpenMP_Init.cpp} | 13 +- .../unit_test/openmp/TestOpenMP_MDRange.cpp | 47 + .../unit_test/openmp/TestOpenMP_Other.cpp | 176 +- .../openmp/TestOpenMP_RangePolicy.cpp | 47 + .../openmp/TestOpenMP_Reductions.cpp | 105 +- .../core/unit_test/openmp/TestOpenMP_Scan.cpp | 47 + .../TestOpenMP_SharedAlloc.cpp} | 9 +- .../unit_test/openmp/TestOpenMP_SubView_a.cpp | 47 +- .../unit_test/openmp/TestOpenMP_SubView_b.cpp | 19 +- .../openmp/TestOpenMP_SubView_c01.cpp | 7 +- .../openmp/TestOpenMP_SubView_c02.cpp | 7 +- .../openmp/TestOpenMP_SubView_c03.cpp | 7 +- .../openmp/TestOpenMP_SubView_c04.cpp | 7 +- .../openmp/TestOpenMP_SubView_c05.cpp | 5 +- .../openmp/TestOpenMP_SubView_c06.cpp | 7 +- .../openmp/TestOpenMP_SubView_c07.cpp | 7 +- .../openmp/TestOpenMP_SubView_c08.cpp | 7 +- .../openmp/TestOpenMP_SubView_c09.cpp | 7 +- .../openmp/TestOpenMP_SubView_c10.cpp | 7 +- .../openmp/TestOpenMP_SubView_c11.cpp | 7 +- .../openmp/TestOpenMP_SubView_c12.cpp | 7 +- .../core/unit_test/openmp/TestOpenMP_Task.cpp | 47 + .../core/unit_test/openmp/TestOpenMP_Team.cpp | 90 +- .../TestOpenMP_TeamReductionScan.cpp} | 85 +- .../openmp/TestOpenMP_TeamScratch.cpp | 83 + .../unit_test/openmp/TestOpenMP_ViewAPI_b.cpp | 83 +- .../openmp/TestOpenMP_ViewMapping_a.cpp | 46 + .../openmp/TestOpenMP_ViewMapping_b.cpp | 46 + .../openmp/TestOpenMP_ViewMapping_subview.cpp | 46 + .../openmp/TestOpenMP_ViewOfClass.cpp | 46 + .../TestOpenMPTarget.hpp} | 90 +- .../TestOpenMPTarget_AtomicOperations.cpp | 46 + .../TestOpenMPTarget_AtomicViews.cpp | 47 + .../openmptarget/TestOpenMPTarget_Atomics.cpp | 46 + .../TestOpenMPTarget_Category.hpp | 65 + .../openmptarget/TestOpenMPTarget_Complex.cpp | 47 + .../TestOpenMPTarget_Init.cpp} | 14 +- .../openmptarget/TestOpenMPTarget_MDRange.cpp | 47 + .../openmptarget/TestOpenMPTarget_Other.cpp | 50 + .../TestOpenMPTarget_RangePolicy.cpp | 47 + .../TestOpenMPTarget_Reductions.cpp | 46 + .../openmptarget/TestOpenMPTarget_Scan.cpp | 47 + .../TestOpenMPTarget_SharedAlloc.cpp | 55 + .../TestOpenMPTarget_SubView_a.cpp | 104 + .../TestOpenMPTarget_SubView_b.cpp} | 22 +- .../TestOpenMPTarget_SubView_c01.cpp | 54 + .../TestOpenMPTarget_SubView_c02.cpp | 54 + .../TestOpenMPTarget_SubView_c03.cpp | 54 + .../TestOpenMPTarget_SubView_c04.cpp | 54 + .../TestOpenMPTarget_SubView_c05.cpp | 54 + .../TestOpenMPTarget_SubView_c06.cpp | 54 + .../TestOpenMPTarget_SubView_c07.cpp} | 8 +- .../TestOpenMPTarget_SubView_c08.cpp | 54 + .../TestOpenMPTarget_SubView_c09.cpp | 54 + .../TestOpenMPTarget_SubView_c10.cpp | 54 + .../TestOpenMPTarget_SubView_c11.cpp | 54 + .../TestOpenMPTarget_SubView_c12.cpp | 54 + .../openmptarget/TestOpenMPTarget_Team.cpp | 75 + .../TestOpenMPTarget_TeamReductionScan.cpp | 81 + .../TestOpenMPTarget_TeamScratch.cpp | 83 + .../TestOpenMPTarget_ViewAPI_b.cpp | 45 + .../TestOpenMPTarget_ViewMapping_a.cpp | 46 + .../TestOpenMPTarget_ViewMapping_b.cpp | 46 + .../TestOpenMPTarget_ViewMapping_subview.cpp | 46 + .../TestOpenMPTarget_ViewOfClass.cpp | 46 + .../qthreads/TestQthreads_Category.hpp | 65 + .../qthreads/TestQthreads_Complex.cpp | 3 + .../unit_test/qthreads/TestQthreads_Other.cpp | 8 +- .../serial/TestSerial_AtomicOperations.cpp | 46 + .../serial/TestSerial_AtomicViews.cpp | 47 + .../unit_test/serial/TestSerial_Atomics.cpp | 162 +- .../unit_test/serial/TestSerial_Category.hpp | 65 + .../unit_test/serial/TestSerial_Complex.cpp | 47 + .../core/unit_test/serial/TestSerial_Init.cpp | 50 + .../unit_test/serial/TestSerial_MDRange.cpp | 47 + .../unit_test/serial/TestSerial_Other.cpp | 136 +- .../serial/TestSerial_RangePolicy.cpp | 47 + .../serial/TestSerial_Reductions.cpp | 88 +- .../core/unit_test/serial/TestSerial_Scan.cpp | 47 + .../TestSerial_SharedAlloc.cpp} | 9 +- .../unit_test/serial/TestSerial_SubView_a.cpp | 47 +- .../unit_test/serial/TestSerial_SubView_b.cpp | 19 +- .../serial/TestSerial_SubView_c01.cpp | 7 +- .../serial/TestSerial_SubView_c02.cpp | 7 +- .../serial/TestSerial_SubView_c03.cpp | 7 +- .../serial/TestSerial_SubView_c04.cpp | 7 +- .../serial/TestSerial_SubView_c05.cpp | 5 +- .../serial/TestSerial_SubView_c06.cpp | 7 +- .../serial/TestSerial_SubView_c07.cpp | 7 +- .../serial/TestSerial_SubView_c08.cpp | 7 +- .../serial/TestSerial_SubView_c09.cpp | 7 +- .../serial/TestSerial_SubView_c10.cpp | 7 +- .../serial/TestSerial_SubView_c11.cpp | 7 +- .../serial/TestSerial_SubView_c12.cpp | 7 +- .../core/unit_test/serial/TestSerial_Task.cpp | 47 + .../core/unit_test/serial/TestSerial_Team.cpp | 87 +- .../serial/TestSerial_TeamReductionScan.cpp | 81 + .../serial/TestSerial_TeamScratch.cpp | 83 + .../unit_test/serial/TestSerial_ViewAPI_b.cpp | 83 +- .../serial/TestSerial_ViewMapping_a.cpp | 46 + .../serial/TestSerial_ViewMapping_b.cpp | 46 + .../serial/TestSerial_ViewMapping_subview.cpp | 46 + .../serial/TestSerial_ViewOfClass.cpp | 46 + .../core/unit_test/threads/TestThreads.hpp | 40 +- .../threads/TestThreads_AtomicOperations.cpp | 46 + .../threads/TestThreads_AtomicViews.cpp | 47 + .../unit_test/threads/TestThreads_Atomics.cpp | 158 +- .../threads/TestThreads_Category.hpp | 65 + .../unit_test/threads/TestThreads_Complex.cpp | 47 + .../unit_test/threads/TestThreads_Init.cpp | 50 + .../unit_test/threads/TestThreads_MDRange.cpp | 47 + .../unit_test/threads/TestThreads_Other.cpp | 160 +- .../threads/TestThreads_RangePolicy.cpp | 47 + .../threads/TestThreads_Reductions.cpp | 106 +- .../unit_test/threads/TestThreads_Scan.cpp | 47 + .../threads/TestThreads_SharedAlloc.cpp | 55 + .../threads/TestThreads_SubView_a.cpp | 47 +- .../threads/TestThreads_SubView_b.cpp | 19 +- .../threads/TestThreads_SubView_c01.cpp | 7 +- .../threads/TestThreads_SubView_c02.cpp | 7 +- .../threads/TestThreads_SubView_c03.cpp | 7 +- .../threads/TestThreads_SubView_c04.cpp | 7 +- .../threads/TestThreads_SubView_c05.cpp | 5 +- .../threads/TestThreads_SubView_c06.cpp | 7 +- .../threads/TestThreads_SubView_c07.cpp | 7 +- .../threads/TestThreads_SubView_c08.cpp | 7 +- .../threads/TestThreads_SubView_c09.cpp | 7 +- .../threads/TestThreads_SubView_c10.cpp | 7 +- .../threads/TestThreads_SubView_c11.cpp | 7 +- .../threads/TestThreads_SubView_c12.cpp | 7 +- .../unit_test/threads/TestThreads_Team.cpp | 90 +- .../threads/TestThreads_TeamReductionScan.cpp | 81 + .../threads/TestThreads_TeamScratch.cpp | 83 + .../threads/TestThreads_ViewAPI_a.cpp | 54 - .../threads/TestThreads_ViewAPI_b.cpp | 83 +- .../threads/TestThreads_ViewMapping_a.cpp | 46 + .../threads/TestThreads_ViewMapping_b.cpp | 46 + .../TestThreads_ViewMapping_subview.cpp | 46 + .../threads/TestThreads_ViewOfClass.cpp | 46 + lib/kokkos/example/cmake_build/CMakeLists.txt | 44 + .../example/cmake_build/cmake_example.cpp | 87 + lib/kokkos/example/feint/ElemFunctor.hpp | 10 +- lib/kokkos/example/feint/feint_threads.cpp | 13 +- lib/kokkos/example/feint/main.cpp | 10 +- lib/kokkos/example/fenl/CGSolve.hpp | 4 +- lib/kokkos/example/fenl/fenl.cpp | 2 +- lib/kokkos/example/fenl/fenl.hpp | 10 +- lib/kokkos/example/fenl/fenl_functors.hpp | 14 +- lib/kokkos/example/fenl/fenl_impl.hpp | 16 +- lib/kokkos/example/fenl/main.cpp | 19 +- lib/kokkos/example/fixture/BoxElemFixture.hpp | 16 +- .../example/global_2_local_ids/G2L_Main.cpp | 12 +- lib/kokkos/example/grow_array/grow_array.hpp | 10 +- lib/kokkos/example/grow_array/main.cpp | 10 +- lib/kokkos/example/md_skeleton/main.cpp | 10 +- .../example/multi_fem/ExplicitFunctors.hpp | 18 +- .../multi_fem/HexExplicitFunctions.hpp | 10 +- lib/kokkos/example/multi_fem/Nonlinear.hpp | 4 +- .../multi_fem/NonlinearElement_Cuda.hpp | 22 +- .../example/multi_fem/ParallelMachine.cpp | 14 +- .../example/multi_fem/SparseLinearSystem.hpp | 4 +- .../example/multi_fem/TestBoxMeshFixture.hpp | 10 +- lib/kokkos/example/sort_array/main.cpp | 16 +- lib/kokkos/example/sort_array/sort_array.hpp | 10 +- .../05_simple_atomics/simple_atomics.cpp | 2 +- lib/kokkos/generate_makefile.bash | 19 +- lib/kokkos/tpls/gtest/gtest/LICENSE | 28 + lib/kokkos/tpls/gtest/gtest/README | 13 + lib/kokkos/tpls/gtest/gtest/gtest-all.cc | 9594 ++++++++ lib/kokkos/tpls/gtest/gtest/gtest-test-part.h | 1 + lib/kokkos/tpls/gtest/gtest/gtest.h | 20065 ++++++++++++++++ 474 files changed, 50972 insertions(+), 10897 deletions(-) create mode 100644 lib/kokkos/cmake/KokkosConfig.cmake.in create mode 100644 lib/kokkos/cmake/Modules/FindHWLOC.cmake create mode 100644 lib/kokkos/cmake/Modules/FindMemkind.cmake create mode 100644 lib/kokkos/cmake/Modules/FindQthreads.cmake create mode 100644 lib/kokkos/cmake/kokkos.cmake create mode 100644 lib/kokkos/config/trilinos-integration/checkin-test create mode 100755 lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel create mode 100755 lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel create mode 100755 lib/kokkos/config/trilinos-integration/white_run_jenkins_script_cuda create mode 100755 lib/kokkos/config/trilinos-integration/white_run_jenkins_script_omp delete mode 100644 lib/kokkos/containers/unit_tests/TestComplex.hpp delete mode 100644 lib/kokkos/core/perf_test/PerfTestCuda.cpp rename lib/kokkos/core/perf_test/{PerfTestGramSchmidt.hpp => PerfTestGramSchmidt.cpp} (78%) rename lib/kokkos/core/perf_test/{PerfTestHexGrad.hpp => PerfTestHexGrad.cpp} (83%) delete mode 100644 lib/kokkos/core/perf_test/PerfTestHost.cpp rename lib/kokkos/core/{unit_test/cuda/TestCuda_ViewAPI_f.cpp => perf_test/PerfTest_Category.hpp} (82%) create mode 100644 lib/kokkos/core/perf_test/PerfTest_CustomReduction.cpp create mode 100755 lib/kokkos/core/perf_test/run_mempool.sh create mode 100755 lib/kokkos/core/perf_test/run_mempool_fill.sh create mode 100755 lib/kokkos/core/perf_test/run_taskdag.sh create mode 100644 lib/kokkos/core/perf_test/test_mempool.cpp create mode 100644 lib/kokkos/core/perf_test/test_taskdag.cpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp create mode 100644 lib/kokkos/core/src/Kokkos_NumericTraits.hpp create mode 100644 lib/kokkos/core/src/Kokkos_OpenMPTarget.hpp create mode 100644 lib/kokkos/core/src/Kokkos_OpenMPTargetSpace.hpp rename lib/kokkos/core/src/OpenMP/{Kokkos_OpenMPexec.cpp => Kokkos_OpenMP_Exec.cpp} (87%) rename lib/kokkos/core/src/OpenMP/{Kokkos_OpenMPexec.hpp => Kokkos_OpenMP_Exec.hpp} (97%) create mode 100644 lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp create mode 100644 lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp create mode 100644 lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp create mode 100644 lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Parallel.hpp create mode 100644 lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp create mode 100644 lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ClockTic.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ConcurrentBitset.hpp delete mode 100644 lib/kokkos/core/src/impl/Kokkos_Reducer.hpp create mode 100644 lib/kokkos/core/unit_test/TestComplex.hpp create mode 100644 lib/kokkos/core/unit_test/TestConcurrentBitset.hpp delete mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceType_d.cpp create mode 100644 lib/kokkos/core/unit_test/TestFunctorAnalysis.hpp create mode 100644 lib/kokkos/core/unit_test/TestInit.hpp create mode 100644 lib/kokkos/core/unit_test/TestReduceCombinatorical.hpp rename lib/kokkos/core/unit_test/{TestViewMapping.hpp => TestViewMapping_a.hpp} (77%) create mode 100644 lib/kokkos/core/unit_test/TestViewMapping_b.hpp create mode 100644 lib/kokkos/core/unit_test/TestViewMapping_subview.hpp create mode 100644 lib/kokkos/core/unit_test/UnitTestMainInit.cpp rename lib/kokkos/core/unit_test/cuda/{TestCuda_ViewAPI_a.cpp => TestCudaHostPinned_Category.hpp} (86%) create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_SharedAlloc.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewAPI.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaHostPinned_ViewMapping_subview.cpp rename lib/kokkos/core/unit_test/cuda/{TestCuda_Reductions_a.cpp => TestCudaUVM_Category.hpp} (86%) create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaUVM_SharedAlloc.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewAPI.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCudaUVM_ViewMapping_subview.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_AtomicOperations.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_AtomicViews.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_Category.hpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_Complex.cpp rename lib/kokkos/core/unit_test/cuda/{TestCuda_ViewAPI_g.cpp => TestCuda_Init.cpp} (93%) create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_MDRange.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_RangePolicy.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_Reductions.cpp delete mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_Reductions_b.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_Scan.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_SharedAlloc.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_Task.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_TeamReductionScan.cpp rename lib/kokkos/core/unit_test/{serial/TestSerial.hpp => cuda/TestCuda_TeamScratch.cpp} (64%) create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_ViewMapping_subview.cpp create mode 100644 lib/kokkos/core/unit_test/cuda/TestCuda_ViewOfClass.cpp rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceType.cpp (81%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_1.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_10.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_11.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_12.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_13.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_14.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_15.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_16.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_2.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_3.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_4.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_5.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_6.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_7.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_8.cpp (100%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceTypeInit_9.cpp (100%) create mode 100644 lib/kokkos/core/unit_test/default/TestDefaultDeviceType_Category.hpp rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceType_a.cpp (91%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceType_b.cpp (90%) rename lib/kokkos/core/unit_test/{ => default}/TestDefaultDeviceType_c.cpp (90%) create mode 100644 lib/kokkos/core/unit_test/default/TestDefaultDeviceType_d.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicOperations.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_AtomicViews.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_Category.hpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_Complex.cpp rename lib/kokkos/core/unit_test/{cuda/TestCuda_ViewAPI_h.cpp => openmp/TestOpenMP_Init.cpp} (92%) create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_MDRange.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_RangePolicy.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_Scan.cpp rename lib/kokkos/core/unit_test/{cuda/TestCuda_ViewAPI_c.cpp => openmp/TestOpenMP_SharedAlloc.cpp} (91%) create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_Task.cpp rename lib/kokkos/core/unit_test/{cuda/TestCuda_ViewAPI_d.cpp => openmp/TestOpenMP_TeamReductionScan.cpp} (55%) create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_TeamScratch.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewMapping_subview.cpp create mode 100644 lib/kokkos/core/unit_test/openmp/TestOpenMP_ViewOfClass.cpp rename lib/kokkos/core/unit_test/{cuda/TestCuda.hpp => openmptarget/TestOpenMPTarget.hpp} (60%) create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicOperations.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_AtomicViews.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Atomics.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Category.hpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Complex.cpp rename lib/kokkos/core/unit_test/{serial/TestSerial_ViewAPI_a.cpp => openmptarget/TestOpenMPTarget_Init.cpp} (90%) create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_MDRange.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Other.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_RangePolicy.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Reductions.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Scan.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SharedAlloc.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_a.cpp rename lib/kokkos/core/unit_test/{cuda/TestCuda_ViewAPI_e.cpp => openmptarget/TestOpenMPTarget_SubView_b.cpp} (71%) create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c01.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c02.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c03.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c04.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c05.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c06.cpp rename lib/kokkos/core/unit_test/{cuda/TestCuda_ViewAPI_s.cpp => openmptarget/TestOpenMPTarget_SubView_c07.cpp} (90%) create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c08.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c09.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c10.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c11.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_SubView_c12.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_Team.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamReductionScan.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_TeamScratch.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewAPI_b.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewMapping_subview.cpp create mode 100644 lib/kokkos/core/unit_test/openmptarget/TestOpenMPTarget_ViewOfClass.cpp create mode 100644 lib/kokkos/core/unit_test/qthreads/TestQthreads_Category.hpp create mode 100644 lib/kokkos/core/unit_test/qthreads/TestQthreads_Complex.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_AtomicOperations.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_AtomicViews.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_Category.hpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_Complex.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_Init.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_MDRange.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_RangePolicy.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_Scan.cpp rename lib/kokkos/core/unit_test/{openmp/TestOpenMP_ViewAPI_a.cpp => serial/TestSerial_SharedAlloc.cpp} (91%) create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_Task.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_TeamReductionScan.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_TeamScratch.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_ViewMapping_subview.cpp create mode 100644 lib/kokkos/core/unit_test/serial/TestSerial_ViewOfClass.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_AtomicOperations.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_AtomicViews.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_Category.hpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_Complex.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_Init.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_MDRange.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_RangePolicy.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_Scan.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_SharedAlloc.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_TeamReductionScan.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_TeamScratch.cpp delete mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_ViewAPI_a.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_a.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_b.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_ViewMapping_subview.cpp create mode 100644 lib/kokkos/core/unit_test/threads/TestThreads_ViewOfClass.cpp create mode 100644 lib/kokkos/example/cmake_build/CMakeLists.txt create mode 100644 lib/kokkos/example/cmake_build/cmake_example.cpp create mode 100644 lib/kokkos/tpls/gtest/gtest/LICENSE create mode 100644 lib/kokkos/tpls/gtest/gtest/README create mode 100644 lib/kokkos/tpls/gtest/gtest/gtest-all.cc create mode 120000 lib/kokkos/tpls/gtest/gtest/gtest-test-part.h create mode 100644 lib/kokkos/tpls/gtest/gtest/gtest.h diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index c6fe991b97..acb54ff22f 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,53 @@ # Change Log + +## [2.03.05](https://github.com/kokkos/kokkos/tree/2.03.05) (2017-05-27) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.00...2.03.05) + +**Implemented enhancements:** + +- Harmonize Custom Reductions over nesting levels [\#802](https://github.com/kokkos/kokkos/issues/802) +- Prevent users directly including KokkosCore\_config.h [\#815](https://github.com/kokkos/kokkos/issues/815) +- DualView aborts on concurrent host/device modify \(in debug mode\) [\#814](https://github.com/kokkos/kokkos/issues/814) +- Abort when running on a NVIDIA CC5.0 or higher architecture with code compiled for CC \< 5.0 [\#813](https://github.com/kokkos/kokkos/issues/813) +- Add "name" function to ExecSpaces [\#806](https://github.com/kokkos/kokkos/issues/806) +- Allow null Future in task spawn dependences [\#795](https://github.com/kokkos/kokkos/issues/795) +- Add Unit Tests for Kokkos::complex [\#785](https://github.com/kokkos/kokkos/issues/785) +- Add pow function for Kokkos::complex [\#784](https://github.com/kokkos/kokkos/issues/784) +- Square root of a complex [\#729](https://github.com/kokkos/kokkos/issues/729) +- Command line processing of --threads argument prevents users from having any commandline arguments starting with --threads [\#760](https://github.com/kokkos/kokkos/issues/760) +- Protected deprecated API with appropriate macro [\#756](https://github.com/kokkos/kokkos/issues/756) +- Allow task scheduler memory pool to be used by tasks [\#747](https://github.com/kokkos/kokkos/issues/747) +- View bounds checking on host-side performance: constructing a std::string [\#723](https://github.com/kokkos/kokkos/issues/723) +- Add check for AppleClang as compiler distinct from check for Clang. [\#705](https://github.com/kokkos/kokkos/issues/705) +- Uninclude source files for specific configurations to prevent link warning. [\#701](https://github.com/kokkos/kokkos/issues/701) +- Add --small option to snapshot script [\#697](https://github.com/kokkos/kokkos/issues/697) +- CMake Standalone Support [\#674](https://github.com/kokkos/kokkos/issues/674) +- CMake build unit test and install [\#808](https://github.com/kokkos/kokkos/issues/808) +- CMake: Fix having kokkos as a subdirectory in a pure cmake project [\#629](https://github.com/kokkos/kokkos/issues/629) +- Tribits macro assumes build directory is in top level source directory [\#654](https://github.com/kokkos/kokkos/issues/654) +- Use bin/nvcc\_wrapper, not config/nvcc\_wrapper [\#562](https://github.com/kokkos/kokkos/issues/562) +- Allow MemoryPool::allocate\(\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487) +- Allow MemoryPool::allocate\\(\\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487) +- Move OpenMP 4.5 OpenMPTarget backend into Develop [\#456](https://github.com/kokkos/kokkos/issues/456) +- Testing on ARM testbed [\#288](https://github.com/kokkos/kokkos/issues/288) + +**Fixed bugs:** + +- Fix label in OpenMP parallel\_reduce verify\_initialized [\#834](https://github.com/kokkos/kokkos/issues/834) +- TeamScratch Level 1 on Cuda hangs [\#820](https://github.com/kokkos/kokkos/issues/820) +- \[bug\] memory pool. [\#786](https://github.com/kokkos/kokkos/issues/786) +- Some Reduction Tests fail on Intel 18 with aggressive vectorization on [\#774](https://github.com/kokkos/kokkos/issues/774) +- Error copying dynamic view on copy of memory pool [\#773](https://github.com/kokkos/kokkos/issues/773) +- CUDA stack overflow with TaskDAG test [\#758](https://github.com/kokkos/kokkos/issues/758) +- ThreadVectorRange Customized Reduction Bug [\#739](https://github.com/kokkos/kokkos/issues/739) +- set\_scratch\_size overflows [\#726](https://github.com/kokkos/kokkos/issues/726) +- Get wrong results for compiler checks in Makefile on OS X. [\#706](https://github.com/kokkos/kokkos/issues/706) +- Fix check if multiple host architectures enabled. [\#702](https://github.com/kokkos/kokkos/issues/702) +- Threads Backend Does not Pass on Cray Compilers [\#609](https://github.com/kokkos/kokkos/issues/609) +- Rare bug in memory pool where allocation can finish on superblock in empty state [\#452](https://github.com/kokkos/kokkos/issues/452) +- LDFLAGS in core/unit\_test/Makefile: potential "undefined reference" to pthread lib [\#148](https://github.com/kokkos/kokkos/issues/148) + ## [2.03.00](https://github.com/kokkos/kokkos/tree/2.03.00) (2017-04-25) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.15...2.03.00) diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt index 1c820660ae..b2771ed527 100644 --- a/lib/kokkos/CMakeLists.txt +++ b/lib/kokkos/CMakeLists.txt @@ -5,11 +5,12 @@ ELSE() ENDIF() IF(NOT KOKKOS_HAS_TRILINOS) - CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR) - INCLUDE(cmake/tribits.cmake) - SET(CMAKE_CXX_STANDARD 11) -ENDIF() + cmake_minimum_required(VERSION 3.1 FATAL_ERROR) + project(Kokkos CXX) + INCLUDE(cmake/kokkos.cmake) +ELSE() +#------------------------------------------------------------------------------ # # A) Forward delcare the package so that certain options are also defined for # subpackages @@ -17,14 +18,13 @@ ENDIF() TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) + #------------------------------------------------------------------------------ # # B) Define the common options for Kokkos first so they can be used by # subpackages as well. # - - # mfh 01 Aug 2016: See Issue #61: # # https://github.com/kokkos/kokkos/issues/61 @@ -83,10 +83,10 @@ TRIBITS_ADD_OPTION_AND_DEFINE( ) ASSERT_DEFINED(TPL_ENABLE_Pthread) -IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) +IF(Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.") -ENDIF () -IF (NOT TPL_ENABLE_Pthread) +ENDIF() +IF(NOT TPL_ENABLE_Pthread) ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0) ENDIF() @@ -98,12 +98,13 @@ TRIBITS_ADD_OPTION_AND_DEFINE( ) TRIBITS_ADD_OPTION_AND_DEFINE( - Kokkos_ENABLE_Qthreads + Kokkos_ENABLE_QTHREAD KOKKOS_HAVE_QTHREADS "Enable Qthreads support in Kokkos." - "${TPL_ENABLE_QTHREADS}" + "${TPL_ENABLE_QTHREAD}" ) +# TODO: No longer an option in Kokkos. Needs to be removed. TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_CXX11 KOKKOS_HAVE_CXX11 @@ -118,6 +119,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "${TPL_ENABLE_HWLOC}" ) +# TODO: This is currently not used in Kokkos. Should it be removed? TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_MPI KOKKOS_HAVE_MPI @@ -154,13 +156,27 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}" ) +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Debug_DualView_Modify_Check + KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + "Enable abort when Kokkos::DualView modified on host and device without sync." + "${Kokkos_ENABLE_DEBUG}" + ) + TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_Profiling - KOKKOS_ENABLE_PROFILING_INTERNAL + KOKKOS_ENABLE_PROFILING "Enable KokkosP profiling support for kernel data collections." "${TPL_ENABLE_DLlib}" ) +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Profiling_Load_Print + KOKKOS_ENABLE_PROFILING_LOAD_PRINT + "Print to standard output which profiling library was loaded." + OFF + ) + # placeholder for future device... TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_ENABLE_Winthread @@ -169,6 +185,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE( "${TPL_ENABLE_Winthread}" ) +# TODO: No longer an option in Kokkos. Needs to be removed. # use new/old View TRIBITS_ADD_OPTION_AND_DEFINE( Kokkos_USING_DEPRECATED_VIEW @@ -177,12 +194,12 @@ TRIBITS_ADD_OPTION_AND_DEFINE( OFF ) + #------------------------------------------------------------------------------ # # C) Install Kokkos' executable scripts # - # nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler. # Kokkos needs nvcc_wrapper in order to build. Other libraries and # executables also need nvcc_wrapper. Thus, we need to install it. @@ -199,6 +216,8 @@ INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin) TRIBITS_PROCESS_SUBPACKAGES() + +#------------------------------------------------------------------------------ # # E) If Kokkos itself is enabled, process the Kokkos package # @@ -213,3 +232,4 @@ TRIBITS_EXCLUDE_FILES( ) TRIBITS_PACKAGE_POSTPROCESS() +ENDIF() diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 5b094dba8c..24cd772e00 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -35,23 +35,26 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "e # Check for advanced settings. KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) +KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l)) +KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) # Check for Kokkos Host Execution Spaces one of which must be on. -KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l)) +KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l)) +KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l)) KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l)) KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) -ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) -ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) - KOKKOS_INTERNAL_USE_SERIAL := 1 -endif -endif + ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) + ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 + endif + endif endif # Check for other Execution Spaces. @@ -64,24 +67,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif # Check OS. -KOKKOS_OS := $(shell uname -s) -KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l) -KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l) -KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l) +KOKKOS_OS := $(strip $(shell uname -s)) +KOKKOS_INTERNAL_OS_CYGWIN := $(strip $(shell uname -s | grep CYGWIN | wc -l)) +KOKKOS_INTERNAL_OS_LINUX := $(strip $(shell uname -s | grep Linux | wc -l)) +KOKKOS_INTERNAL_OS_DARWIN := $(strip $(shell uname -s | grep Darwin | wc -l)) # Check compiler. -KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) -KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) -KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) -KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) -KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l) +KOKKOS_INTERNAL_COMPILER_INTEL := $(strip $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)) +KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)) +KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) +KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) +KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l)) +KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l)) +KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l)) ifneq ($(OMPI_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l) + KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)) endif ifneq ($(MPICH_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l) + KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)) endif -KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 @@ -90,6 +94,11 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2) KOKKOS_INTERNAL_COMPILER_XL = 1 endif +# Apple Clang passes both clang and apple clang tests, so turn off clang. +ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) + KOKKOS_INTERNAL_COMPILER_CLANG = 0 +endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.') @@ -97,29 +106,43 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0) $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher) endif + KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1 endif endif +# Set OpenMP flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -mp else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp else - ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) - KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp + ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp else - ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) - # OpenMP is turned on by default in Cray compiler environment. - KOKKOS_INTERNAL_OPENMP_FLAG := + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp else - KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + # OpenMP is turned on by default in Cray compiler environment. + KOKKOS_INTERNAL_OPENMP_FLAG := + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + endif endif endif endif endif +ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp + endif +endif +# Set C++11 flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) KOKKOS_INTERNAL_CXX11_FLAG := --c++11 else @@ -146,7 +169,7 @@ KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) # NVIDIA based. -NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper +NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) @@ -180,10 +203,20 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1) + ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc) + CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=) + KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH) + endif + endif +endif # ARM based. KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l)) KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc)) # IBM based. KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) @@ -206,8 +239,11 @@ KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) +# Decide whether we can support transactional memory +KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) + # Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc )) +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc )) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) @@ -240,12 +276,22 @@ tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.t tmp := $(shell date >> KokkosCore_config.tmp) tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) +tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#else' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp) +tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp) + tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp) +endif + ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) endif @@ -262,6 +308,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) endif +ifeq ($(KOKKOS_INTERNAL_USE_TM), 1) + tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp ) +endif + ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) @@ -293,13 +345,21 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1) endif ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) -ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_CXXFLAGS += -lineinfo -endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_CXXFLAGS += -lineinfo + endif + KOKKOS_CXXFLAGS += -g KOKKOS_LDFLAGS += -g -ldl tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0) + tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp ) + endif +endif + +ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1) + tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp ) endif ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) @@ -311,8 +371,6 @@ endif ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp ) KOKKOS_LIBS += -lrt endif @@ -323,8 +381,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) endif -ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) - tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING 0" >> KokkosCore_config.tmp ) +ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0) + tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp ) endif tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp) @@ -336,39 +394,44 @@ endif tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) + endif + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) -endif + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) -endif + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += --relocatable-device-code=true - KOKKOS_LDFLAGS += --relocatable-device-code=true -endif + ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda + else + $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) + endif + endif -ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) - ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += -expt-extended-lambda - else - $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.) endif endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp ) endif endif -endif - # Add Architecture flags. ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1) @@ -469,7 +532,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1) endif endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) @@ -491,6 +554,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX2 + KOKKOS_LDFLAGS += -xCORE-AVX2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=haswell + KOKKOS_LDFLAGS += -tp=haswell + else + # Assume that this is a really a GNU compiler. + KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm + KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm + endif + endif + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) @@ -501,12 +586,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) else - ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) else # Asssume that this is really a GNU compiler. - KOKKOS_CXXFLAGS += -march=knl - KOKKOS_LDFLAGS += -march=knl + KOKKOS_CXXFLAGS += -march=knl -mtune=knl + KOKKOS_LDFLAGS += -march=knl -mtune=knl endif endif endif @@ -526,8 +611,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1) else # Nothing here yet. - KOKKOS_CXXFLAGS += -march=skylake-avx512 - KOKKOS_LDFLAGS += -march=skylake-avx512 + KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm + KOKKOS_LDFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm endif endif endif @@ -541,70 +626,67 @@ endif # Figure out the architecture flag for Cuda. ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch + endif + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch + KOKKOS_CXXFLAGS += -x cuda + endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) - KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch -endif -ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) - KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=--cuda-gpu-arch - KOKKOS_CXXFLAGS += -x cuda -endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60 + endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61 + endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61 -endif -ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) - tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp ) - KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 - KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60 -endif + ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) + ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1) + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG) + endif + endif endif KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) @@ -630,9 +712,24 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) - KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include + KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 KOKKOS_LIBS += -lcudart -lcuda + + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH) + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) + KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp) + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) + else + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) + endif + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) @@ -666,10 +763,27 @@ endif ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) KOKKOS_INTERNAL_GCC_PATH = $(shell which g++) KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=) - KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC + KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) endif +# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning. +ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC)) +endif + +# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning. +ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC)) +endif + +# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial +# device to avoid a link warning. +ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC)) + KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC)) +endif + # With Cygwin functions such as fdopen and fileno are not defined # when strict ansi is enabled. strict ansi gets enabled with --std=c++11 # though. So we hard undefine it here. Not sure if that has any bad side effects diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 54cacb741b..3cb52a04cd 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -53,11 +53,20 @@ Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/K endif ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) -Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp +Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp endif +ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1) +Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp +Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp +#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp +# $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp +endif + Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp diff --git a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp index e69de29bb2..9c08a088b0 100644 --- a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp +++ b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp @@ -0,0 +1 @@ +void KOKKOS_ALGORITHMS_SRC_DUMMY_PREVENT_LINK_ERROR() {} diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp index bd73582362..42c115b7a5 100644 --- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -674,7 +674,7 @@ namespace Kokkos { const double V = 2.0*drand() - 1.0; S = U*U+V*V; } - return U*sqrt(-2.0*log(S)/S); + return U*std::sqrt(-2.0*log(S)/S); } KOKKOS_INLINE_FUNCTION @@ -917,7 +917,7 @@ namespace Kokkos { const double V = 2.0*drand() - 1.0; S = U*U+V*V; } - return U*sqrt(-2.0*log(S)/S); + return U*std::sqrt(-2.0*log(S)/S); } KOKKOS_INLINE_FUNCTION @@ -1171,7 +1171,7 @@ namespace Kokkos { const double V = 2.0*drand() - 1.0; S = U*U+V*V; } - return U*sqrt(-2.0*log(S)/S); + return U*std::sqrt(-2.0*log(S)/S); } KOKKOS_INLINE_FUNCTION diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile index 3027c6a94b..b74192ef18 100644 --- a/lib/kokkos/algorithms/unit_tests/Makefile +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -8,7 +8,7 @@ default: build_all echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else CXX = g++ endif @@ -21,8 +21,8 @@ include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests -TEST_TARGETS = -TARGETS = +TEST_TARGETS = +TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o @@ -49,16 +49,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) endif KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda + $(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Cuda KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads + $(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Threads KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP + $(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_OpenMP KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) - $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial + $(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Serial test-cuda: KokkosAlgorithms_UnitTest_Cuda ./KokkosAlgorithms_UnitTest_Cuda @@ -76,7 +76,7 @@ build_all: $(TARGETS) test: $(TEST_TARGETS) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o $(TARGETS) # Compilation rules @@ -84,6 +84,5 @@ clean: kokkos-clean %.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp index ba3938f497..710eeb8ada 100644 --- a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,15 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include +#include +#ifdef KOKKOS_ENABLE_CUDA + +#include #include #include @@ -49,8 +52,6 @@ #include -#ifdef KOKKOS_ENABLE_CUDA - #include #include @@ -105,6 +106,7 @@ CUDA_SORT_UNSIGNED(171) #undef CUDA_RANDOM_XORSHIFT1024 #undef CUDA_SORT_UNSIGNED } - +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTCUDA_PREVENT_LINK_ERROR() {} #endif /* #ifdef KOKKOS_ENABLE_CUDA */ diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp index f4d582d0bb..1e7ee68549 100644 --- a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,13 +36,16 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include +#include +#ifdef KOKKOS_ENABLE_OPENMP + +#include #include //---------------------------------------------------------------------------- @@ -52,7 +55,6 @@ namespace Test { -#ifdef KOKKOS_ENABLE_OPENMP class openmp : public ::testing::Test { protected: static void SetUpTestCase() @@ -97,6 +99,8 @@ OPENMP_SORT_UNSIGNED(171) #undef OPENMP_RANDOM_XORSHIFT64 #undef OPENMP_RANDOM_XORSHIFT1024 #undef OPENMP_SORT_UNSIGNED -#endif } // namespace test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index c906b9f2cd..9cf02f74b4 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -295,7 +295,7 @@ struct test_random_scalar { parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); - double tolerance = 1.6*sqrt(1.0/num_draws); + double tolerance = 1.6*std::sqrt(1.0/num_draws); double mean_expect = 0.5*Kokkos::rand::max(); double variance_expect = 1.0/3.0*mean_expect*mean_expect; double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; @@ -321,7 +321,7 @@ struct test_random_scalar { typedef test_histogram1d_functor functor_type; parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result); - double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); double mean_expect = 1.0*num_draws*3/HIST_DIM1D; double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D; @@ -354,7 +354,7 @@ struct test_random_scalar { typedef test_histogram3d_functor functor_type; parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result); - double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double tolerance = 6*std::sqrt(1.0/HIST_DIM1D); double mean_expect = 1.0*num_draws/HIST_DIM1D; double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D; diff --git a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp index 6ac80cf73a..a1df93e07b 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include +#ifdef KOKKOS_ENABLE_SERIAL + #include #include @@ -55,7 +58,6 @@ namespace Test { -#ifdef KOKKOS_ENABLE_SERIAL class serial : public ::testing::Test { protected: static void SetUpTestCase() @@ -93,7 +95,9 @@ SERIAL_SORT_UNSIGNED(171) #undef SERIAL_RANDOM_XORSHIFT1024 #undef SERIAL_SORT_UNSIGNED -#endif // KOKKOS_ENABLE_SERIAL } // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTSERIAL_PREVENT_LINK_ERROR() {} +#endif // KOKKOS_ENABLE_SERIAL diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp index 61ffa6f43a..04be98f1cc 100644 --- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,12 +35,12 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER -#ifndef TESTSORT_HPP_ -#define TESTSORT_HPP_ +#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP +#define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP #include #include @@ -212,7 +212,12 @@ void test_dynamic_view_sort(unsigned int n ) const size_t upper_bound = 2 * n ; typename KeyDynamicViewType::memory_pool - pool( memory_space() , 2 * n * sizeof(KeyType) ); + pool( memory_space() + , n * sizeof(KeyType) * 1.2 + , 500 /* min block size in bytes */ + , 30000 /* max block size in bytes */ + , 1000000 /* min superblock size in bytes */ + ); KeyDynamicViewType keys("Keys",pool,upper_bound); @@ -272,4 +277,4 @@ void test_sort(unsigned int N) } } -#endif /* TESTSORT_HPP_ */ +#endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */ diff --git a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp index 36d438b643..08749779ff 100644 --- a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include +#ifdef KOKKOS_ENABLE_THREADS + #include #include @@ -55,7 +58,6 @@ namespace Test { -#ifdef KOKKOS_ENABLE_PTHREAD class threads : public ::testing::Test { protected: static void SetUpTestCase() @@ -107,7 +109,9 @@ THREADS_SORT_UNSIGNED(171) #undef THREADS_RANDOM_XORSHIFT1024 #undef THREADS_SORT_UNSIGNED -#endif } // namespace Test +#else +void KOKKOS_ALGORITHMS_UNITTESTS_TESTTHREADS_PREVENT_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/benchmarks/bytes_and_flops/Makefile b/lib/kokkos/benchmarks/bytes_and_flops/Makefile index 6a1917a523..5ddf78f28e 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile +++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile @@ -7,7 +7,7 @@ default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper EXE = bytes_and_flops.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -22,7 +22,7 @@ CXXFLAGS = -O3 -g DEPFLAGS = -M LINK = ${CXX} -LINKFLAGS = +LINKFLAGS = OBJ = $(SRC:.cpp=.o) LIB = @@ -34,7 +34,7 @@ build: $(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o *.cuda *.host # Compilation rules diff --git a/lib/kokkos/benchmarks/gather/Makefile b/lib/kokkos/benchmarks/gather/Makefile index fd1feab6fa..0ea9fb1dd2 100644 --- a/lib/kokkos/benchmarks/gather/Makefile +++ b/lib/kokkos/benchmarks/gather/Makefile @@ -7,7 +7,7 @@ default: build echo "Start Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) -CXX = ${KOKKOS_PATH}/config/nvcc_wrapper +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper EXE = gather.cuda KOKKOS_DEVICES = "Cuda,OpenMP" KOKKOS_ARCH = "SNB,Kepler35" @@ -22,7 +22,7 @@ CXXFLAGS = -O3 -g DEPFLAGS = -M LINK = ${CXX} -LINKFLAGS = +LINKFLAGS = OBJ = $(SRC:.cpp=.o) LIB = @@ -35,10 +35,10 @@ build: $(EXE) $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o *.cuda *.host # Compilation rules -%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/cmake/KokkosConfig.cmake.in b/lib/kokkos/cmake/KokkosConfig.cmake.in new file mode 100644 index 0000000000..fc099a494c --- /dev/null +++ b/lib/kokkos/cmake/KokkosConfig.cmake.in @@ -0,0 +1,18 @@ +# - Config file for the Kokkos package +# It defines the following variables +# Kokkos_INCLUDE_DIRS - include directories for Kokkos +# Kokkos_LIBRARIES - libraries to link against + +# Compute paths +GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) +SET(Kokkos_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@") + +# Our library dependencies (contains definitions for IMPORTED targets) +IF(NOT TARGET kokkos AND NOT Kokkos_BINARY_DIR) + INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake") +ENDIF() + +# These are IMPORTED targets created by KokkosTargets.cmake +SET(Kokkos_LIBRARY_DIRS @INSTALL_LIB_DIR@) +SET(Kokkos_LIBRARIES @Kokkos_LIBRARIES_NAMES@) +SET(Kokkos_TPL_LIBRARIES @KOKKOS_LIBS@) diff --git a/lib/kokkos/cmake/Modules/FindHWLOC.cmake b/lib/kokkos/cmake/Modules/FindHWLOC.cmake new file mode 100644 index 0000000000..273dcb5c8a --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindHWLOC.cmake @@ -0,0 +1,20 @@ +#.rst: +# FindHWLOC +# ---------- +# +# Try to find HWLOC. +# +# The following variables are defined: +# +# HWLOC_FOUND - System has HWLOC +# HWLOC_INCLUDE_DIR - HWLOC include directory +# HWLOC_LIBRARIES - Libraries needed to use HWLOC + +find_path(HWLOC_INCLUDE_DIR hwloc.h) +find_library(HWLOC_LIBRARIES hwloc) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(HWLOC DEFAULT_MSG + HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) + +mark_as_advanced(HWLOC_INCLUDE_DIR HWLOC_LIBRARIES) diff --git a/lib/kokkos/cmake/Modules/FindMemkind.cmake b/lib/kokkos/cmake/Modules/FindMemkind.cmake new file mode 100644 index 0000000000..245fb44c19 --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindMemkind.cmake @@ -0,0 +1,20 @@ +#.rst: +# FindMemkind +# ---------- +# +# Try to find Memkind. +# +# The following variables are defined: +# +# MEMKIND_FOUND - System has Memkind +# MEMKIND_INCLUDE_DIR - Memkind include directory +# MEMKIND_LIBRARIES - Libraries needed to use Memkind + +find_path(MEMKIND_INCLUDE_DIR memkind.h) +find_library(MEMKIND_LIBRARIES memkind) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Memkind DEFAULT_MSG + MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) + +mark_as_advanced(MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES) diff --git a/lib/kokkos/cmake/Modules/FindQthreads.cmake b/lib/kokkos/cmake/Modules/FindQthreads.cmake new file mode 100644 index 0000000000..a254b0e996 --- /dev/null +++ b/lib/kokkos/cmake/Modules/FindQthreads.cmake @@ -0,0 +1,20 @@ +#.rst: +# FindQthreads +# ---------- +# +# Try to find Qthreads. +# +# The following variables are defined: +# +# QTHREADS_FOUND - System has Qthreads +# QTHREADS_INCLUDE_DIR - Qthreads include directory +# QTHREADS_LIBRARIES - Libraries needed to use Qthreads + +find_path(QTHREADS_INCLUDE_DIR qthread.h) +find_library(QTHREADS_LIBRARIES qthread) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(Qthreads DEFAULT_MSG + QTHREADS_INCLUDE_DIR QTHREADS_LIBRARIES) + +mark_as_advanced(QTHREADS_INCLUDE_DIR QTHREADS_LIBRARIES) diff --git a/lib/kokkos/cmake/kokkos.cmake b/lib/kokkos/cmake/kokkos.cmake new file mode 100644 index 0000000000..235b7eaba4 --- /dev/null +++ b/lib/kokkos/cmake/kokkos.cmake @@ -0,0 +1,1198 @@ + + +# Set which Kokkos backend to use. +set(KOKKOS_ENABLE_CUDA OFF CACHE BOOL "Use Kokkos CUDA backend") +set(KOKKOS_ENABLE_OPENMP ON CACHE BOOL "Use Kokkos OpenMP backend") +set(KOKKOS_ENABLE_PTHREAD OFF CACHE BOOL "Use Kokkos Pthreads backend") +set(KOKKOS_ENABLE_QTHREADS OFF CACHE BOOL "Use Kokkos Qthreads backend") +set(KOKKOS_ENABLE_SERIAL ON CACHE BOOL "Use Kokkos Serial backend") + +# List of possible host architectures. +list(APPEND KOKKOS_HOST_ARCH_LIST + None # No architecture optimization + AMDAVX # AMD chip + ARMv80 # ARMv8.0 Compatible CPU + ARMv81 # ARMv8.1 Compatible CPU + ARMv8-ThunderX # ARMv8 Cavium ThunderX CPU + SNB # Intel Sandy/Ivy Bridge CPUs + HSW # Intel Haswell CPUs + BDW # Intel Broadwell Xeon E-class CPUs + SKX # Intel Sky Lake Xeon E-class HPC CPUs (AVX512) + KNC # Intel Knights Corner Xeon Phi + KNL # Intel Knights Landing Xeon Phi + BGQ # IBM Blue Gene Q + Power7 # IBM POWER7 CPUs + Power8 # IBM POWER8 CPUs + Power9 # IBM POWER9 CPUs + ) + +# Setting this variable to a value other than "None" can improve host +# performance by turning on architecture specific code. +set(KOKKOS_HOST_ARCH "None" CACHE STRING "Optimize for specific host architecture.") +set_property(CACHE KOKKOS_HOST_ARCH PROPERTY STRINGS ${KOKKOS_HOST_ARCH_LIST}) + +# List of possible GPU architectures. +list(APPEND KOKKOS_GPU_ARCH_LIST + None # No architecture optimization + Kepler # NVIDIA Kepler default (generation CC 3.5) + Kepler30 # NVIDIA Kepler generation CC 3.0 + Kepler32 # NVIDIA Kepler generation CC 3.2 + Kepler35 # NVIDIA Kepler generation CC 3.5 + Kepler37 # NVIDIA Kepler generation CC 3.7 + Maxwell # NVIDIA Maxwell default (generation CC 5.0) + Maxwell50 # NVIDIA Maxwell generation CC 5.0 + Maxwell52 # NVIDIA Maxwell generation CC 5.2 + Maxwell53 # NVIDIA Maxwell generation CC 5.3 + Pascal60 # NVIDIA Pascal generation CC 6.0 + Pascal61 # NVIDIA Pascal generation CC 6.1 + ) + +# Setting this variable to a value other than "None" can improve GPU +# performance by turning on architecture specific code. +set(KOKKOS_GPU_ARCH "None" CACHE STRING "Optimize for specific GPU architecture.") +set_property(CACHE KOKKOS_GPU_ARCH PROPERTY STRINGS ${KOKKOS_GPU_ARCH_LIST}) + +set(KOKKOS_SEPARATE_LIBS OFF CACHE BOOL "OFF = kokkos. ON = kokkoscore, kokkoscontainers, and kokkosalgorithms.") + +# Enable hwloc library. +set(KOKKOS_ENABLE_HWLOC OFF CACHE BOOL "Enable hwloc for better process placement.") +set(KOKKOS_HWLOC_DIR "" CACHE PATH "Location of hwloc library.") + +# Enable memkind library. +set(KOKKOS_ENABLE_MEMKIND OFF CACHE BOOL "Enable memkind.") +set(KOKKOS_MEMKIND_DIR "" CACHE PATH "Location of memkind library.") + +set(KOKKOS_ENABLE_LIBRT OFF CACHE BOOL "Enable librt for more precise timer.") + +# Enable debugging. +set(KOKKOS_DEBUG OFF CACHE BOOL "Enable debugging in Kokkos.") + +# Enable profiling. +set(KOKKOS_ENABLE_PROFILING ON CACHE BOOL "Enable profiling.") + +# Enable aggressive vectorization. +set(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION OFF CACHE BOOL "Enable aggressive vectorization.") + +# Qthreads options. +set(KOKKOS_QTHREADS_DIR "" CACHE PATH "Location of Qthreads library.") + +# CUDA options. +set(KOKKOS_CUDA_DIR "" CACHE PATH "Location of CUDA library. Defaults to where nvcc installed.") +set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC OFF CACHE BOOL "Enable CUDA LDG.") +set(KOKKOS_ENABLE_CUDA_UVM OFF CACHE BOOL "Enable CUDA unified virtual memory.") +set(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE OFF CACHE BOOL "Enable relocatable device code for CUDA.") +set(KOKKOS_ENABLE_CUDA_LAMBDA ON CACHE BOOL "Enable lambdas for CUDA.") + +################################### FUNCTIONS ################################## + +# Sets the following compiler variables that are analogous to the CMAKE_* +# versions. We add the ability to detect NVCC (really nvcc_wrapper). +# KOKKOS_CXX_COMPILER +# KOKKOS_CXX_COMPILER_ID +# KOKKOS_CXX_COMPILER_VERSION +# +# Also verifies the compiler version meets the minimum required by Kokkos. +function(set_kokkos_cxx_compiler) + # Since CMake doesn't recognize the nvcc compiler until 3.8, we use our own + # version of the CMake variables and detect nvcc ourselves. Initially set to + # the CMake variable values. + set(INTERNAL_CXX_COMPILER ${CMAKE_CXX_COMPILER}) + set(INTERNAL_CXX_COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) + set(INTERNAL_CXX_COMPILER_VERSION ${CMAKE_CXX_COMPILER_VERSION}) + + # Check if the compiler is nvcc (which really means nvcc_wrapper). + execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version + COMMAND grep nvcc + COMMAND wc -l + OUTPUT_VARIABLE INTERNAL_HAVE_COMPILER_NVCC + OUTPUT_STRIP_TRAILING_WHITESPACE) + + string(REGEX REPLACE "^ +" "" + INTERNAL_HAVE_COMPILER_NVCC ${INTERNAL_HAVE_COMPILER_NVCC}) + + if(INTERNAL_HAVE_COMPILER_NVCC) + # Set the compiler id to nvcc. We use the value used by CMake 3.8. + set(INTERNAL_CXX_COMPILER_ID NVIDIA) + + # Set nvcc's compiler version. + execute_process(COMMAND ${INTERNAL_CXX_COMPILER} --version + COMMAND grep release + OUTPUT_VARIABLE INTERNAL_CXX_COMPILER_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE) + + string(REGEX MATCH "[0-9]+\.[0-9]+\.[0-9]+$" + INTERNAL_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION}) + endif() + + # Enforce the minimum compilers supported by Kokkos. + set(KOKKOS_MESSAGE_TEXT "Compiler not supported by Kokkos. Required compiler versions:") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Clang 3.5.2 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n GCC 4.7.2 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Intel 14.0.4 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n NVCC 7.0.28 or higher") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n PGI 17.1 or higher\n") + + if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 3.5.2) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL GNU) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.7.2) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL Intel) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 14.0.4) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 7.0.28) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + elseif(INTERNAL_CXX_COMPILER_ID STREQUAL PGI) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 17.1) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") + endif() + endif() + + # Enforce that extensions are turned off for nvcc_wrapper. + if(INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + if(NOT DEFINED CMAKE_CXX_EXTENSIONS OR CMAKE_CXX_EXTENSIONS STREQUAL ON) + message(FATAL_ERROR "NVCC doesn't support C++ extensions. Set CMAKE_CXX_EXTENSIONS to OFF in your CMakeLists.txt.") + endif() + endif() + + if(KOKKOS_ENABLE_CUDA) + # Enforce that the compiler can compile CUDA code. + if(INTERNAL_CXX_COMPILER_ID STREQUAL Clang) + if(INTERNAL_CXX_COMPILER_VERSION VERSION_LESS 4.0.0) + message(FATAL_ERROR "Compiling CUDA code directly with Clang requires version 4.0.0 or higher.") + endif() + elseif(NOT INTERNAL_CXX_COMPILER_ID STREQUAL NVIDIA) + message(FATAL_ERROR "Invalid compiler for CUDA. The compiler must be nvcc_wrapper or Clang.") + endif() + endif() + + set(KOKKOS_CXX_COMPILER ${INTERNAL_CXX_COMPILER} PARENT_SCOPE) + set(KOKKOS_CXX_COMPILER_ID ${INTERNAL_CXX_COMPILER_ID} PARENT_SCOPE) + set(KOKKOS_CXX_COMPILER_VERSION ${INTERNAL_CXX_COMPILER_VERSION} PARENT_SCOPE) +endfunction() + +# Transitively enforces that the appropriate CXX standard compile flags (C++11 +# or above) are added to targets that use the Kokkos library. Compile features +# are used if possible. Otherwise, the appropriate flags are added to +# KOKKOS_CXX_FLAGS. Values set by the user to CMAKE_CXX_STANDARD and +# CMAKE_CXX_EXTENSIONS are honored. +function(set_kokkos_compiler_standard) + # The following table lists the versions of CMake that supports CXX_STANDARD + # and the CXX compile features for different compilers. The versions are + # based on CMake documentation, looking at CMake code, and verifying by + # testing with specific CMake versions. + # + # COMPILER CXX_STANDARD Compile Features + # --------------------------------------------------------------- + # Clang 3.1 3.1 + # GNU 3.1 3.2 + # AppleClang 3.2 3.2 + # Intel 3.6 3.6 + # Cray No No + # PGI No No + # XL No No + # + # For compiling CUDA code using nvcc_wrapper, we will use the host compiler's + # flags for turning on C++11. Since for compiler ID and versioning purposes + # CMake recognizes the host compiler when calling nvcc_wrapper, this just + # works. Both NVCC and nvcc_wrapper only recognize '-std=c++11' which means + # that we can only use host compilers for CUDA builds that use those flags. + # It also means that extensions (gnu++11) can't be turned on for CUDA builds. + + # Check if we can use compile features. + if(NOT KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + if(CMAKE_CXX_COMPILER_ID STREQUAL Clang) + if(NOT CMAKE_VERSION VERSION_LESS 3.1) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang OR CMAKE_CXX_COMPILER_ID STREQUAL GNU) + if(NOT CMAKE_VERSION VERSION_LESS 3.2) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) + if(NOT CMAKE_VERSION VERSION_LESS 3.6) + set(INTERNAL_USE_COMPILE_FEATURES ON) + endif() + endif() + endif() + + if(INTERNAL_USE_COMPILE_FEATURES) + # Use the compile features aspect of CMake to transitively cause C++ flags + # to populate to user code. + + # I'm using a hack by requiring features that I know force the lowest version + # of the compilers we want to support. Clang 3.3 and later support all of + # the C++11 standard. With CMake 3.8 and higher, we could switch to using + # cxx_std_11. + set(KOKKOS_CXX11_FEATURES + cxx_nonstatic_member_init # Forces GCC 4.7 or later and Intel 14.0 or later. + PARENT_SCOPE + ) + else() + # CXX compile features are not yet implemented for this combination of + # compiler and version of CMake. + + if(CMAKE_CXX_COMPILER_ID STREQUAL AppleClang) + # Versions of CMAKE before 3.2 don't support CXX_STANDARD or C++ compile + # features for the AppleClang compiler. Set compiler flags transitively + # here such that they trickle down to a call to target_compile_options(). + + # The following two blocks of code were copied from + # /Modules/Compiler/AppleClang-CXX.cmake from CMake 3.7.2 and then + # modified. + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.0) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=gnu++11") + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 6.1) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++14") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++14") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.1) + # AppleClang 5.0 knows this flag, but does not set a __cplusplus macro + # greater than 201103L. + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++1y") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=gnu++1y") + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Intel) + # Versions of CMAKE before 3.6 don't support CXX_STANDARD or C++ compile + # features for the Intel compiler. Set compiler flags transitively here + # such that they trickle down to a call to target_compile_options(). + + # The following three blocks of code were copied from + # /Modules/Compiler/Intel-CXX.cmake from CMake 3.7.2 and then modified. + if("x${CMAKE_CXX_SIMULATE_ID}" STREQUAL "xMSVC") + set(_std -Qstd) + set(_ext c++) + else() + set(_std -std) + set(_ext gnu++) + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.2) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++14") + # TODO: There is no gnu++14 value supported; figure out what to do. + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++14") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 15.0.0) + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "${_std}=c++1y") + # TODO: There is no gnu++14 value supported; figure out what to do. + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "${_std}=c++1y") + endif() + + if(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 13.0) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}11") + elseif(NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12.1) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "${_std}=c++0x") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "${_std}=${_ext}0x") + endif() + elseif(CMAKE_CXX_COMPILER_ID STREQUAL Cray) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the Cray + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-hstd=c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-hstd=c++11") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL PGI) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the PGI + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "--c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "--c++11") + elseif(CMAKE_CXX_COMPILER_ID STREQUAL XL) + # CMAKE doesn't support CXX_STANDARD or C++ compile features for the XL + # compiler. Set compiler options transitively here such that they trickle + # down to a call to target_compile_options(). + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX14_STANDARD_COMPILE_OPTION "-std=c++11") + set(INTERNAL_CXX14_EXTENSION_COMPILE_OPTION "-std=c++11") + else() + # Assume GNU. CMAKE_CXX_STANDARD is handled correctly by CMake 3.1 and + # above for this compiler. If the user explicitly requests a C++ + # standard, CMake takes care of it. If not, transitively require C++11. + if(NOT CMAKE_CXX_STANDARD) + set(INTERNAL_CXX11_STANDARD_COMPILE_OPTION ${CMAKE_CXX11_STANDARD_COMPILE_OPTION}) + set(INTERNAL_CXX11_EXTENSION_COMPILE_OPTION ${CMAKE_CXX11_EXTENSION_COMPILE_OPTION}) + endif() + endif() + + # Set the C++ standard info for Kokkos respecting user set values for + # CMAKE_CXX_STANDARD and CMAKE_CXX_EXTENSIONS. + if(CMAKE_CXX_STANDARD EQUAL 14) + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_STANDARD_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX14_EXTENSION_COMPILE_OPTION}) + endif() + elseif(CMAKE_CXX_STANDARD EQUAL 11) + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) + endif() + else() + # The user didn't explicitly request a standard, transitively require + # C++11 respecting CMAKE_CXX_EXTENSIONS. + if(DEFINED CMAKE_CXX_EXTENSIONS AND CMAKE_CXX_EXTENSIONS STREQUAL OFF) + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_STANDARD_COMPILE_OPTION}) + else() + set(INTERNAL_CXX_FLAGS ${INTERNAL_CXX11_EXTENSION_COMPILE_OPTION}) + endif() + endif() + + set(KOKKOS_CXX_FLAGS ${INTERNAL_CXX_FLAGS} PARENT_SCOPE) + endif() +endfunction() + +########################## COMPILER AND FEATURE CHECKS ######################### + +# TODO: We are assuming that nvcc_wrapper is using g++ as the host compiler. +# Should we allow the user the option to change this? The host compiler +# for nvcc_wrapper can be set via the NVCC_WRAPPER_DEFAULT_COMPILER +# environment variable or by passing a different host compiler with the +# -ccbin flag. + +# TODO: Fully add CUDA support for Clang. +set_kokkos_cxx_compiler() + +set_kokkos_compiler_standard() + +######################### INITIALIZE INTERNAL VARIABLES ######################## + +# Add Kokkos' modules to CMake's module path. +set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${Kokkos_SOURCE_DIR}/cmake/Modules/") + +# Start with all global variables set to false. This guarantees correct +# results with changes and multiple configures. +set(KOKKOS_HAVE_CUDA OFF CACHE INTERNAL "") +set(KOKKOS_USE_CUDA_UVM OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_CUDA_RDC OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_CUDA_LAMBDA OFF CACHE INTERNAL "") +set(KOKKOS_CUDA_CLANG_WORKAROUND OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_OPENMP OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_PTHREAD OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_QTHREADS OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_SERIAL OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_HWLOC OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_HBWSPACE OFF CACHE INTERNAL "") +set(KOKKOS_HAVE_DEBUG OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_ISA_X86_64 OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_ISA_KNC OFF CACHE INTERNAL "") +set(KOKKOS_ENABLE_ISA_POWERPCLE OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_ARMV80 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_ARMV81 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_ARMV8_THUNDERX OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX2 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX512MIC OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_AVX512XEON OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KNC OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_POWER8 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_POWER9 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER30 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER32 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER35 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_KEPLER37 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL50 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL52 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_MAXWELL53 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_PASCAL OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_PASCAL60 OFF CACHE INTERNAL "") +set(KOKKOS_ARCH_PASCAL61 OFF CACHE INTERNAL "") + +############################## SET BACKEND OPTIONS ############################# + +# Make sure at least one backend is selected. +if(NOT KOKKOS_ENABLE_CUDA AND NOT KOKKOS_ENABLE_OPENMP AND NOT KOKKOS_ENABLE_PTHREAD AND NOT KOKKOS_ENABLE_QTHREADS AND NOT KOKKOS_ENABLE_SERIAL) + message(FATAL_ERROR "Must set one of KOKKOS_ENABLE_CUDA, KOKKOS_ENABLE_OPENMP, KOKKOS_ENABLE_PTHREAD, KOKKOS_ENABLE_QTHREADS, or KOKKOS_ENABLE_SERIAL") +endif() + +# Only one of OpenMP, Pthreads, and Qthreads can be set. +set(KOKKOS_MESSAGE_TEXT "Only one of KOKKOS_ENABLE_OPENMP, KOKKOS_ENABLE_PTHREAD, and KOKKOS_ENABLE_QTHREADS can be selected") +if(KOKKOS_ENABLE_OPENMP AND KOKKOS_ENABLE_PTHREAD) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +elseif(KOKKOS_ENABLE_OPENMP AND KOKKOS_ENABLE_QTHREADS) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +elseif(KOKKOS_ENABLE_PTHREAD AND KOKKOS_ENABLE_QTHREADS) + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +endif() + +# Get source files. +file(GLOB KOKKOS_CORE_SRCS core/src/impl/*.cpp) +file(GLOB KOKKOS_CONTAINERS_SRCS containers/src/impl/*.cpp) + +# Set options if using CUDA backend. +if(KOKKOS_ENABLE_CUDA) + if(KOKKOS_CUDA_DIR) + set(CUDA_TOOLKIT_ROOT_DIR ${KOKKOS_CUDA_DIR}) + endif() + + find_package(CUDA) + + if(NOT CUDA_FOUND) + if(KOKKOS_CUDA_DIR) + message(FATAL_ERROR "Couldn't find CUDA in default locations, and KOKKOS_CUDA_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find CUDA in default locations. Set KOKKOS_CUDA_DIR.") + endif() + endif() + + list(APPEND KOKKOS_INCLUDE_DIRS ${CUDA_INCLUDE_DIRS}) + list(APPEND KOKKOS_LD_FLAGS -L${CUDA_TOOLKIT_ROOT_DIR}/lib64) + list(APPEND KOKKOS_LIBS cudart cuda) + + set(KOKKOS_HAVE_CUDA ON CACHE INTERNAL "") + file(GLOB KOKKOS_CUDA_SRCS core/src/Cuda/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_CUDA_SRCS}) + + # Set CUDA UVM if requested. + if(KOKKOS_ENABLE_CUDA_UVM) + set(KOKKOS_USE_CUDA_UVM ON CACHE INTERNAL "") + endif() + + # Set CUDA relocatable device code if requested. + if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + set(KOKKOS_HAVE_CUDA_RDC ON CACHE INTERNAL "") + list(APPEND KOKKOS_CXX_FLAGS --relocatable-device-code=true) + list(APPEND KOKKOS_LD_FLAGS --relocatable-device-code=true) + endif() + + # Set CUDA lambda if requested. + if(KOKKOS_ENABLE_CUDA_LAMBDA) + set(KOKKOS_HAVE_CUDA_LAMBDA ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + if(KOKKOS_CXX_COMPILER_VERSION VERSION_LESS 7.5) + message(FATAL_ERROR "CUDA lambda support requires CUDA 7.5 or higher. Disable it or use a 7.5 or later compiler.") + else() + list(APPEND KOKKOS_CXX_FLAGS -expt-extended-lambda) + endif() + endif() + endif() + + # Set Clang specific options. + if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + list(APPEND KOKKOS_CXX_FLAGS --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}) + + set(KOKKOS_CUDA_CLANG_WORKAROUND ON CACHE INTERNAL "") + + # Force CUDA_LDG_INTRINSIC on when using Clang. + set(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC ON CACHE BOOL "Enable CUDA LDG." FORCE) + endif() +endif() + +# Set options if using OpenMP backend. +if(KOKKOS_ENABLE_OPENMP) + find_package(OpenMP REQUIRED) + + if(OPENMP_FOUND) + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + list(APPEND KOKKOS_CXX_FLAGS -Xcompiler) + endif() + + list(APPEND KOKKOS_CXX_FLAGS ${OpenMP_CXX_FLAGS}) + list(APPEND KOKKOS_LD_FLAGS ${OpenMP_CXX_FLAGS}) + endif() + + set(KOKKOS_HAVE_OPENMP ON CACHE INTERNAL "") + file(GLOB KOKKOS_OPENMP_SRCS core/src/OpenMP/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_OPENMP_SRCS}) +endif() + +# Set options if using Pthreads backend. +if(KOKKOS_ENABLE_PTHREAD) + find_package(Threads REQUIRED) + + list(APPEND KOKKOS_LIBS Threads::Threads) + + set(KOKKOS_HAVE_PTHREAD ON CACHE INTERNAL "") + file(GLOB KOKKOS_PTHREAD_SRCS core/src/Threads/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_PTHREAD_SRCS}) +endif() + +# Set options if using Qthreads backend. +if(KOKKOS_ENABLE_QTHREADS) + if(KOKKOS_QTHREADS_DIR) + list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_QTHREADS_DIR}) + endif() + + find_package(Qthreads) + + if(NOT QTHREADS_FOUND) + if(KOKKOS_QTHREADS_DIR) + message(FATAL_ERROR "Couldn't find Qthreads in default locations, and KOKKOS_QTHREADS_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find Qthreads in default locations. Set KOKKOS_QTHREADS_DIR.") + endif() + endif() + + list(APPEND KOKKOS_INCLUDE_DIRS ${QTHREADS_INCLUDE_DIR}) + list(APPEND KOKKOS_LIBS ${QTHREADS_LIBRARIES}) + + set(KOKKOS_HAVE_QTHREADS ON CACHE INTERNAL "") + file(GLOB KOKKOS_QTHREADS_SRCS core/src/Threads/*.cpp) + list(APPEND KOKKOS_CORE_SRCS ${KOKKOS_QTHREADS_SRCS}) + + if(KOKKOS_QTHREADS_DIR) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + endif() +endif() + +# Set options if using Serial backend. +if(KOKKOS_ENABLE_SERIAL) + set(KOKKOS_HAVE_SERIAL ON CACHE INTERNAL "") +else() + # Remove serial source files. + list(REMOVE_ITEM KOKKOS_CORE_SRCS + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Serial.cpp" + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Serial_Task.cpp") +endif() + +########################### SET ARCHITECTURE OPTIONS ########################### + +# Make sure the host architecture option is valid. Need to verify in case user +# passes the option via the command line. +list(FIND KOKKOS_HOST_ARCH_LIST "${KOKKOS_HOST_ARCH}" KOKKOS_VALID_HOST_ARCH) +if(KOKKOS_VALID_HOST_ARCH EQUAL -1) + set(KOKKOS_ARCH_TEXT "\n ${KOKKOS_HOST_ARCH_LIST}") + string(REPLACE ";" "\n " KOKKOS_ARCH_TEXT "${KOKKOS_ARCH_TEXT}") + set(KOKKOS_MESSAGE_TEXT "Invalid architecture for KOKKOS_HOST_ARCH: '${KOKKOS_HOST_ARCH}'") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Choices:${KOKKOS_ARCH_TEXT}\n") + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +endif() + +# Make sure the GPU architecture option is valid. Need to verify in case user +# passes the option via the command line. +list(FIND KOKKOS_GPU_ARCH_LIST "${KOKKOS_GPU_ARCH}" KOKKOS_VALID_GPU_ARCH) +if(KOKKOS_VALID_GPU_ARCH EQUAL -1) + set(KOKKOS_ARCH_TEXT "\n ${KOKKOS_GPU_ARCH_LIST}") + string(REPLACE ";" "\n " KOKKOS_ARCH_TEXT "${KOKKOS_ARCH_TEXT}") + set(KOKKOS_MESSAGE_TEXT "Invalid architecture for KOKKOS_GPU_ARCH: '${KOKKOS_GPU_ARCH}'") + set(KOKKOS_MESSAGE_TEXT "${KOKKOS_MESSAGE_TEXT}\n Choices:${KOKKOS_ARCH_TEXT}\n") + message(FATAL_ERROR "${KOKKOS_MESSAGE_TEXT}") +endif() + +# Decide what ISA level we are able to support. +if(KOKKOS_HOST_ARCH STREQUAL SNB OR KOKKOS_HOST_ARCH STREQUAL HSW OR KOKKOS_HOST_ARCH STREQUAL BDW OR + KOKKOS_HOST_ARCH STREQUAL SKX OR KOKKOS_HOST_ARCH STREQUAL KNL) + set(KOKKOS_ENABLE_ISA_X86_64 ON CACHE INTERNAL "") +endif() + +if(KOKKOS_HOST_ARCH STREQUAL KNC) + set(KOKKOS_ENABLE_ISA_KNC ON CACHE INTERNAL "") +endif() + +if(KOKKOS_HOST_ARCH STREQUAL Power8 OR KOKKOS_HOST_ARCH STREQUAL Power9) + set(KOKKOS_ENABLE_ISA_POWERPCLE ON CACHE INTERNAL "") +endif() + +# Add host architecture options. +if(KOKKOS_HOST_ARCH STREQUAL ARMv80) + set(KOKKOS_ARCH_ARMV80 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=armv8-a) + list(APPEND KOKKOS_LD_FLAGS -march=armv8-a) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL ARMv81) + set(KOKKOS_ARCH_ARMV81 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=armv8.1-a) + list(APPEND KOKKOS_LD_FLAGS -march=armv8.1-a) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL ARMv8-ThunderX) + set(KOKKOS_ARCH_ARMV80 ON CACHE INTERNAL "") + set(KOKKOS_ARCH_ARMV8_THUNDERX ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=armv8-a -mtune=thunderx) + list(APPEND KOKKOS_LD_FLAGS -march=armv8-a -mtune=thunderx) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL SNB OR KOKKOS_HOST_ARCH STREQUAL AMDAVX) + set(KOKKOS_ARCH_AVX ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -mavx) + list(APPEND KOKKOS_LD_FLAGS -mavx) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + list(APPEND KOKKOS_CXX_FLAGS -tp=sandybridge) + list(APPEND KOKKOS_LD_FLAGS -tp=sandybridge) + else() + list(APPEND KOKKOS_CXX_FLAGS -mavx) + list(APPEND KOKKOS_LD_FLAGS -mavx) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL HSW OR KOKKOS_HOST_ARCH STREQUAL BDW) + set(KOKKOS_ARCH_AVX2 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -xCORE-AVX2) + list(APPEND KOKKOS_LD_FLAGS -xCORE-AVX2) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + list(APPEND KOKKOS_CXX_FLAGS -tp=haswell) + list(APPEND KOKKOS_LD_FLAGS -tp=haswell) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=core-avx2 -mtune=core-avx2) + list(APPEND KOKKOS_LD_FLAGS -march=core-avx2 -mtune=core-avx2) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL KNL) + set(KOKKOS_ARCH_AVX512MIC ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -xMIC-AVX512) + list(APPEND KOKKOS_LD_FLAGS -xMIC-AVX512) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=knl) + list(APPEND KOKKOS_LD_FLAGS -march=knl) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL SKX) + set(KOKKOS_ARCH_AVX512XEON ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL Intel) + list(APPEND KOKKOS_CXX_FLAGS -xCORE-AVX512) + list(APPEND KOKKOS_LD_FLAGS -xCORE-AVX512) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Cray) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -march=skylake-avx512) + list(APPEND KOKKOS_LD_FLAGS -march=skylake-avx512) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL KNC) + set(KOKKOS_ARCH_KNC ON CACHE INTERNAL "") + list(APPEND KOKKOS_CXX_FLAGS -mmic) + list(APPEND KOKKOS_LD_FLAGS -mmic) +elseif(KOKKOS_HOST_ARCH STREQUAL Power8) + set(KOKKOS_ARCH_POWER8 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -mcpu=power8 -mtune=power8) + list(APPEND KOKKOS_LD_FLAGS -mcpu=power8 -mtune=power8) + endif() +elseif(KOKKOS_HOST_ARCH STREQUAL Power9) + set(KOKKOS_ARCH_POWER9 ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COMPILER_ID STREQUAL PGI) + else() + list(APPEND KOKKOS_CXX_FLAGS -mcpu=power9 -mtune=power9) + list(APPEND KOKKOS_LD_FLAGS -mcpu=power9 -mtune=power9) + endif() +endif() + +# Add GPU architecture options. +if(KOKKOS_ENABLE_CUDA) + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + set(KOKKOS_GPU_ARCH_FLAG -arch) + elseif(KOKKOS_CXX_COMPILER_ID STREQUAL Clang) + list(APPEND KOKKOS_CXX_FLAGS -x cuda) + set(KOKKOS_GPU_ARCH_FLAG --cuda-gpu-arch) + endif() + + if(KOKKOS_GPU_ARCH STREQUAL Kepler30) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER30 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_30) + elseif(KOKKOS_GPU_ARCH STREQUAL Kepler32) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER32 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_32) + elseif(KOKKOS_GPU_ARCH STREQUAL Kepler35 OR KOKKOS_GPU_ARCH STREQUAL Kepler) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER35 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_35) + elseif(KOKKOS_GPU_ARCH STREQUAL Kepler37) + set(KOKKOS_ARCH_KEPLER ON CACHE INTERNAL "") + set(KOKKOS_ARCH_KEPLER37 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_37) + elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell50 OR KOKKOS_GPU_ARCH STREQUAL Maxwell) + set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_MAXWELL50 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_50) + elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell52) + set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_MAXWELL52 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_52) + elseif(KOKKOS_GPU_ARCH STREQUAL Maxwell53) + set(KOKKOS_ARCH_MAXWELL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_MAXWELL53 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_53) + elseif(KOKKOS_GPU_ARCH STREQUAL Pascal60) + set(KOKKOS_ARCH_PASCAL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_PASCAL60 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_60) + elseif(KOKKOS_GPU_ARCH STREQUAL Pascal61) + set(KOKKOS_ARCH_PASCAL ON CACHE INTERNAL "") + set(KOKKOS_ARCH_PASCAL61 ON CACHE INTERNAL "") + set(KOKKOS_GPU_ARCH_FLAG ${KOKKOS_GPU_ARCH_FLAG}=sm_61) + endif() + + if(NOT KOKKOS_GPU_ARCH STREQUAL None) + list(APPEND KOKKOS_CXX_FLAGS ${KOKKOS_GPU_ARCH_FLAG}) + + if(KOKKOS_CXX_COMPILER_ID STREQUAL NVIDIA) + list(APPEND KOKKOS_LD_FLAGS ${KOKKOS_GPU_ARCH_FLAG}) + endif() + endif() +endif() + +############################### SET OTHER OPTIONS ############################## + +# Set options if using hwloc. +if(KOKKOS_ENABLE_HWLOC) + if(KOKKOS_HWLOC_DIR) + list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_HWLOC_DIR}) + endif() + + find_package(HWLOC) + + if(NOT HWLOC_FOUND) + if(KOKKOS_HWLOC_DIR) + message(FATAL_ERROR "Couldn't find HWLOC in default locations, and KOKKOS_HWLOC_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find HWLOC in default locations. Set KOKKOS_HWLOC_DIR.") + endif() + endif() + + list(APPEND KOKKOS_INCLUDE_DIRS ${HWLOC_INCLUDE_DIR}) + list(APPEND KOKKOS_LIBS ${HWLOC_LIBRARIES}) + + set(KOKKOS_HAVE_HWLOC ON CACHE INTERNAL "") + + if(KOKKOS_HWLOC_DIR) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + endif() +endif() + +# Set options if using memkind. +if(KOKKOS_ENABLE_MEMKIND) + if(KOKKOS_MEMKIND_DIR) + list(APPEND CMAKE_PREFIX_PATH ${KOKKOS_MEMKIND_DIR}) + endif() + + find_package(Memkind) + + if(NOT MEMKIND_FOUND) + if(KOKKOS_MEMKIND_DIR) + message(FATAL_ERROR "Couldn't find Memkind in default locations, and KOKKOS_MEMKIND_DIR points to an invalid installation.") + else() + message(FATAL_ERROR "Couldn't find Memkind in default locations. Set KOKKOS_MEMKIND_DIR.") + endif() + endif() + + set(KOKKOS_ENABLE_HBWSPACE ON CACHE INTERNAL "") + list(APPEND KOKKOS_INCLUDE_DIRS ${MEMKIND_INCLUDE_DIR}) + list(APPEND KOKKOS_LIBS ${MEMKIND_LIBRARIES}) + + if(KOKKOS_MEMKIND_DIR) + list(REMOVE_AT CMAKE_PREFIX_PATH -1) + endif() +else() + # Remove HBW source file. + list(REMOVE_ITEM KOKKOS_CORE_SRCS + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_HBWSpace.cpp") +endif() + +# Set options if using librt. +if(KOKKOS_ENABLE_LIBRT) + list(APPEND KOKKOS_LIBS rt) +endif() + +# Set debugging if requested. +if(KOKKOS_DEBUG) + set(KOKKOS_HAVE_DEBUG ON CACHE INTERNAL "") + set(KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ON CACHE INTERNAL "") + + if(KOKKOS_CXX_COVIDIA) + list(APPEND KOKKOS_CXX_FLAGS -lineinfo) + endif() + + list(APPEND KOKKOS_CXX_FLAGS -g) + list(APPEND KOKKOS_LD_FLAGS -g) +endif() + +# Set profiling if requested. +if(KOKKOS_ENABLE_PROFILING) + list(APPEND KOKKOS_LIBS dl) +else() + # Remove profiling source file. + list(REMOVE_ITEM KOKKOS_CORE_SRCS + "${Kokkos_SOURCE_DIR}/core/src/impl/Kokkos_Profiling_Interface.cpp") +endif() + +# Use GCC toolchain with Clang. +if(KOKKOS_CXX_COMPILER_ID STREQUAL Clang AND NOT APPLE) + find_program(KOKKOS_GCC_PATH g++) + if(NOT KOKKOS_GCC_PATH) + message(FATAL_ERROR "Can't find GCC path to get toolchain for Clang.") + endif() + string(REPLACE "/bin/g++" "" KOKKOS_GCC_PATH ${KOKKOS_GCC_PATH}) + + list(APPEND KOKKOS_CXX_FLAGS --gcc-toolchain=${KOKKOS_GCC_PATH}) + list(APPEND KOKKOS_LD_FLAGS --gcc-toolchain=${KOKKOS_GCC_PATH}) +endif() + +############################ Detect if submodule ############################### +# +# With thanks to StackOverflow: +# http://stackoverflow.com/questions/25199677/how-to-detect-if-current-scope-has-a-parent-in-cmake +# +get_directory_property(HAS_PARENT PARENT_DIRECTORY) +if(HAS_PARENT) + message(STATUS "Submodule build") + SET(KOKKOS_HEADER_DIR "include/kokkos") +else() + message(STATUS "Standalone build") + SET(KOKKOS_HEADER_DIR "include") +endif() + +############################ PRINT CONFIGURE STATUS ############################ + +message(STATUS "") +message(STATUS "****************** Kokkos Settings ******************") +message(STATUS "Execution Spaces") + +if(KOKKOS_ENABLE_CUDA) + message(STATUS " Device Parallel: Cuda") +else() + message(STATUS " Device Parallel: None") +endif() + +if(KOKKOS_ENABLE_OPENMP) + message(STATUS " Host Parallel: OpenMP") +elseif(KOKKOS_ENABLE_PTHREAD) + message(STATUS " Host Parallel: Pthread") +elseif(KOKKOS_ENABLE_QTHREADS) + message(STATUS " Host Parallel: Qthreads") +else() + message(STATUS " Host Parallel: None") +endif() + +if(KOKKOS_ENABLE_SERIAL) + message(STATUS " Host Serial: Serial") +else() + message(STATUS " Host Serial: None") +endif() + +message(STATUS "") +message(STATUS "Architectures") +message(STATUS " Host Architecture: ${KOKKOS_HOST_ARCH}") +message(STATUS " Device Architecture: ${KOKKOS_GPU_ARCH}") + +message(STATUS "") +message(STATUS "Enabled options") + +if(KOKKOS_SEPARATE_LIBS) + message(STATUS " KOKKOS_SEPARATE_LIBS") +endif() + +if(KOKKOS_ENABLE_HWLOC) + message(STATUS " KOKKOS_ENABLE_HWLOC") +endif() + +if(KOKKOS_ENABLE_MEMKIND) + message(STATUS " KOKKOS_ENABLE_MEMKIND") +endif() + +if(KOKKOS_DEBUG) + message(STATUS " KOKKOS_DEBUG") +endif() + +if(KOKKOS_ENABLE_PROFILING) + message(STATUS " KOKKOS_ENABLE_PROFILING") +endif() + +if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION) + message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION") +endif() + +if(KOKKOS_ENABLE_CUDA) + if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC) + message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC") + endif() + + if(KOKKOS_ENABLE_CUDA_UVM) + message(STATUS " KOKKOS_ENABLE_CUDA_UVM") + endif() + + if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE) + message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE") + endif() + + if(KOKKOS_ENABLE_CUDA_LAMBDA) + message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA") + endif() + + if(KOKKOS_CUDA_DIR) + message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}") + endif() +endif() + +if(KOKKOS_QTHREADS_DIR) + message(STATUS " KOKKOS_QTHREADS_DIR: ${KOKKOS_QTHREADS_DIR}") +endif() + +if(KOKKOS_HWLOC_DIR) + message(STATUS " KOKKOS_HWLOC_DIR: ${KOKKOS_HWLOC_DIR}") +endif() + +if(KOKKOS_MEMKIND_DIR) + message(STATUS " KOKKOS_MEMKIND_DIR: ${KOKKOS_MEMKIND_DIR}") +endif() + +message(STATUS "*****************************************************") +message(STATUS "") + +################################ SET UP PROJECT ################################ + +configure_file( + ${Kokkos_SOURCE_DIR}/core/cmake/KokkosCore_config.h.in + ${Kokkos_BINARY_DIR}/KokkosCore_config.h +) + +SET(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") +SET(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") +SET(INSTALL_INCLUDE_DIR ${KOKKOS_HEADER_DIR} CACHE PATH + "Installation directory for header files") +IF(WIN32 AND NOT CYGWIN) + SET(DEF_INSTALL_CMAKE_DIR CMake) +ELSE() + SET(DEF_INSTALL_CMAKE_DIR lib/CMake/Kokkos) +ENDIF() + +SET(INSTALL_CMAKE_DIR ${DEF_INSTALL_CMAKE_DIR} CACHE PATH + "Installation directory for CMake files") + +# Make relative paths absolute (needed later on) +FOREACH(p LIB BIN INCLUDE CMAKE) + SET(var INSTALL_${p}_DIR) + IF(NOT IS_ABSOLUTE "${${var}}") + SET(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") + ENDIF() +ENDFOREACH() + +# set up include-directories +SET (Kokkos_INCLUDE_DIRS + ${Kokkos_SOURCE_DIR}/core/src + ${Kokkos_SOURCE_DIR}/containers/src + ${Kokkos_SOURCE_DIR}/algorithms/src + ${Kokkos_BINARY_DIR} # to find KokkosCore_config.h +) + +INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS}) + +IF(KOKKOS_SEPARATE_LIBS) + # kokkoscore + ADD_LIBRARY( + kokkoscore + ${KOKKOS_CORE_SRCS} + ) + + target_compile_options( + kokkoscore + PUBLIC ${KOKKOS_CXX_FLAGS} + ) + + target_compile_features( + kokkoscore + PUBLIC ${KOKKOS_CXX11_FEATURES} + ) + + # Install the kokkoscore library + INSTALL (TARGETS kokkoscore + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin + ) + + # Install the kokkoscore headers + INSTALL (DIRECTORY + ${Kokkos_SOURCE_DIR}/core/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + # Install KokkosCore_config.h header + INSTALL (FILES + ${Kokkos_BINARY_DIR}/KokkosCore_config.h + DESTINATION ${KOKKOS_HEADER_DIR} + ) + + TARGET_LINK_LIBRARIES( + kokkoscore + ${KOKKOS_LD_FLAGS} + ${KOKKOS_LIBS} + ) + + # kokkoscontainers + ADD_LIBRARY( + kokkoscontainers + ${KOKKOS_CONTAINERS_SRCS} + ) + + TARGET_LINK_LIBRARIES( + kokkoscontainers + kokkoscore + ) + + # Install the kokkocontainers library + INSTALL (TARGETS kokkoscontainers + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + # Install the kokkoscontainers headers + INSTALL (DIRECTORY + ${Kokkos_SOURCE_DIR}/containers/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + # kokkosalgorithms - Build as interface library since no source files. + ADD_LIBRARY( + kokkosalgorithms + INTERFACE + ) + + target_include_directories( + kokkosalgorithms + INTERFACE ${Kokkos_SOURCE_DIR}/algorithms/src + ) + + TARGET_LINK_LIBRARIES( + kokkosalgorithms + INTERFACE kokkoscore + ) + + # Install the kokkoalgorithms library + INSTALL (TARGETS kokkosalgorithms + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + # Install the kokkosalgorithms headers + INSTALL (DIRECTORY + ${Kokkos_SOURCE_DIR}/algorithms/src/ + DESTINATION ${KOKKOS_INSTALL_INDLUDE_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + SET (Kokkos_LIBRARIES_NAMES kokkoscore kokkoscontainers kokkosalgorithms) + +ELSE() + # kokkos + ADD_LIBRARY( + kokkos + ${KOKKOS_CORE_SRCS} + ${KOKKOS_CONTAINERS_SRCS} + ) + + target_compile_options( + kokkos + PUBLIC ${KOKKOS_CXX_FLAGS} + ) + + target_compile_features( + kokkos + PUBLIC ${KOKKOS_CXX11_FEATURES} + ) + + TARGET_LINK_LIBRARIES( + kokkos + ${KOKKOS_LD_FLAGS} + ${KOKKOS_LIBS} + ) + + # Install the kokkos library + INSTALL (TARGETS kokkos + EXPORT KokkosTargets + ARCHIVE DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + LIBRARY DESTINATION ${CMAKE_INSTALL_PREFIX}/lib + RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin) + + + # Install the kokkos headers + INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/core/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/containers/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + INSTALL (DIRECTORY + EXPORT KokkosTargets + ${Kokkos_SOURCE_DIR}/algorithms/src/ + DESTINATION ${KOKKOS_HEADER_DIR} + FILES_MATCHING PATTERN "*.hpp" + ) + + INSTALL (FILES + ${Kokkos_BINARY_DIR}/KokkosCore_config.h + DESTINATION ${KOKKOS_HEADER_DIR} + ) + + include_directories(${Kokkos_BINARY_DIR}) + include_directories(${Kokkos_SOURCE_DIR}/core/src) + include_directories(${Kokkos_SOURCE_DIR}/containers/src) + include_directories(${Kokkos_SOURCE_DIR}/algorithms/src) + + + SET (Kokkos_LIBRARIES_NAMES kokkos) + +endif() + +# Add all targets to the build-tree export set +export(TARGETS ${Kokkos_LIBRARIES_NAMES} + FILE "${Kokkos_BINARY_DIR}/KokkosTargets.cmake") + +# Export the package for use from the build-tree +# (this registers the build-tree with a global CMake-registry) +export(PACKAGE Kokkos) + +# Create the KokkosConfig.cmake and KokkosConfigVersion files +file(RELATIVE_PATH REL_INCLUDE_DIR "${INSTALL_CMAKE_DIR}" + "${INSTALL_INCLUDE_DIR}") +# ... for the build tree +set(CONF_INCLUDE_DIRS "${Kokkos_SOURCE_DIR}" "${Kokkos_BINARY_DIR}") +configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}/KokkosConfig.cmake" @ONLY) +# ... for the install tree +set(CONF_INCLUDE_DIRS "\${Kokkos_CMAKE_DIR}/${REL_INCLUDE_DIR}") +configure_file(${Kokkos_SOURCE_DIR}/cmake/KokkosConfig.cmake.in + "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" @ONLY) + +# Install the KokkosConfig.cmake and KokkosConfigVersion.cmake +install(FILES + "${Kokkos_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/KokkosConfig.cmake" + DESTINATION "${INSTALL_CMAKE_DIR}") + +# Install the export set for use with the install-tree +INSTALL(EXPORT KokkosTargets DESTINATION + "${INSTALL_CMAKE_DIR}") diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt index 961e4186ec..0f24487814 100644 --- a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt +++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt @@ -60,34 +60,12 @@ Step 2: // -------------------------------------------------------------------------------- // Step 3: - 3.1. Build and test Trilinos with 3 different configurations; a configure-all script is provided in Trilinos and should be modified to test each of the following 3 configurations with appropriate environment variable(s): + 3.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard are provided in kokkos/config/trilinos-integration - - GCC/4.7.2-OpenMP/Complex - Run tests with the following environment variable: + Usually its a good idea to run those script via nohup. + You can run all four at the same time, use separate directories for each. - export OMP_NUM_THREADS=2 - - - - Intel/15.0.2-Serial/NoComplex - - - - GCC/4.8.4/CUDA/7.5.18-Cuda/Serial/NoComplex - Run tests with the following environment variables: - - export CUDA_LAUNCH_BLOCKING=1 - export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 - - - mkdir Build - cd Build - cp TRILINOS_PATH/sampleScripts/Sandia-SEMS/configure-all ./ - ** Set the path to Trilinos appropriately within the configure-all script ** - source $SEMS_MODULE_ROOT/utils/sems-modules-init.sh kokkos - source configure-all - make -k (-k means "keep going" to get past build errors; -j12 can also be specified to build with 12 threads, for example) - ctest - - 3.2. Compare the failed test output to the test output on the dashboard ( testing.sandia.gov/cdash select Trilinos ); investigate and fix problems if new tests fail after the Kokkos snapshot + 3.2. Compare the failed test output between the pristine and the updated runs; investigate and fix problems if new tests fail after the Kokkos snapshot // -------------------------------------------------------------------------------- // @@ -134,7 +112,7 @@ Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on master: sha1 develop: sha1 - git push --follow-tags origin master + 4.4. Do NOT push yet // -------------------------------------------------------------------------------- // @@ -156,9 +134,15 @@ Step 5: python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages - 5.3. Push the updated develop branch of Trilinos to Github - congratulations!!! + 5.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3) - (From Trilinos directory): - git push + The modules are listed in kokkos/config/trilinos-integration/checkin-test + Run checkin-test, forward dependencies and optional dependencies must be enabled + If push failed because someone else clearly broke something, push manually. + If push failed for unclear reasons, investigate, fix, and potentially start over from step 2 after reseting your local kokkos/master branch + +Step 6: Push Kokkos to master + + git push --follow-tags origin master // -------------------------------------------------------------------------------- // diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh index d4fb25a8e1..1867de7204 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-all.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-all.sh @@ -13,7 +13,7 @@ # module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu # # The 'nvcc-wrapper' module should load a script that matches -# kokkos/config/nvcc_wrapper +# kokkos/bin/nvcc_wrapper # #----------------------------------------------------------------------------- # Source and installation directories: diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh index c2e17bb944..5a6cc1493e 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh @@ -13,7 +13,7 @@ # module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu # # The 'nvcc-wrapper' module should load a script that matches -# kokkos/config/nvcc_wrapper +# kokkos/bin/nvcc_wrapper # #----------------------------------------------------------------------------- # Source and installation directories: diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh index 39b72d5ce1..606755da81 100755 --- a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh @@ -13,7 +13,7 @@ # module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu # # The 'nvcc-wrapper' module should load a script that matches -# kokkos/config/nvcc_wrapper +# kokkos/bin/nvcc_wrapper # #----------------------------------------------------------------------------- # Source and installation directories: diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt index 9eaecb5031..cc6f4c97d7 100644 --- a/lib/kokkos/config/master_history.txt +++ b/lib/kokkos/config/master_history.txt @@ -5,4 +5,5 @@ tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304 tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966 tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6 -tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641 +tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641 +tag: 2.03.05 date: 05:27:2017 master: 36b92f43 develop: 79073186 diff --git a/lib/kokkos/config/snapshot.py b/lib/kokkos/config/snapshot.py index d816cd0c9c..bfa97bf48a 100755 --- a/lib/kokkos/config/snapshot.py +++ b/lib/kokkos/config/snapshot.py @@ -27,7 +27,7 @@ import subprocess, argparse, re, doctest, os, datetime, traceback def parse_cmdline(description): parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description) - parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True, + parser.add_argument("-n", "--no-commit", action="store_false", dest="create_commit", default=True, help="Do not perform a commit or create a commit message.") parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False, help="Enable verbose mode.") @@ -39,6 +39,8 @@ def parse_cmdline(description): help="Type of repository of the source, use none to skip all repository operations.") parser.add_argument("--dest-repo", choices=["git","none"], default="", help="Type of repository of the destination, use none to skip all repository operations.") + parser.add_argument("--small", action="store_true", dest="small_mode", + help="Don't include tests and other extra files when copying.") parser.add_argument("source", help="Source project to snapshot from.") parser.add_argument("destination", help="Destination to snapshot too.") @@ -58,9 +60,9 @@ def validate_options(options): options.source = os.path.abspath(options.source) options.destination = os.path.abspath(options.destination) - + if os.path.exists(options.source): - apparent_source_repo_type, source_root = deterimine_repo_type(options.source) + apparent_source_repo_type, source_root = determine_repo_type(options.source) else: raise RuntimeError("Could not find source directory of %s." % options.source) options.source_root = source_root @@ -69,7 +71,7 @@ def validate_options(options): print "Could not find destination directory of %s so it will be created." % options.destination os.makedirs(options.destination) - apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination) + apparent_dest_repo_type, dest_root = determine_repo_type(options.destination) options.dest_root = dest_root #error on svn repo types for now @@ -111,7 +113,7 @@ def run_cmd(cmd, options, working_dir="."): print "==== %s stderr ====" % cmd_str print proc_stderr print "==== %s stderr ====" % cmd_str - + if ret_val != 0: raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \ (cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout)) @@ -119,7 +121,7 @@ def run_cmd(cmd, options, working_dir="."): return proc_stdout, proc_stderr #end run_cmd -def deterimine_repo_type(location): +def determine_repo_type(location): apparent_repo_type = "none" while location != "": @@ -133,16 +135,32 @@ def deterimine_repo_type(location): location = location[:location.rfind(os.sep)] return apparent_repo_type, location - -#end deterimine_repo_type +#end determine_repo_type def rsync(source, dest, options): rsync_cmd = ["rsync", "-ar", "--delete"] if options.debug_mode: rsync_cmd.append("-v") + if options.small_mode or options.source_repo == "git": + rsync_cmd.append("--delete-excluded") + + if options.small_mode: + rsync_cmd.append("--include=config/master_history.txt") + rsync_cmd.append("--include=cmake/tpls") + rsync_cmd.append("--exclude=benchmarks/") + rsync_cmd.append("--exclude=config/*") + rsync_cmd.append("--exclude=doc/") + rsync_cmd.append("--exclude=example/") + rsync_cmd.append("--exclude=tpls/") + rsync_cmd.append("--exclude=HOW_TO_SNAPSHOT") + rsync_cmd.append("--exclude=unit_test") + rsync_cmd.append("--exclude=unit_tests") + rsync_cmd.append("--exclude=perf_test") + rsync_cmd.append("--exclude=performance_tests") + if options.source_repo == "git": - rsync_cmd.append("--exclude=.git") + rsync_cmd.append("--exclude=.git*") rsync_cmd.append(options.source) rsync_cmd.append(options.destination) @@ -171,28 +189,27 @@ def find_git_commit_information(options): ('sems', 'software.sandia.gov:/git/sems') """ git_log_cmd = ["git", "log", "-1"] - + output, error = run_cmd(git_log_cmd, options, options.source) - + commit_match = re.match("commit ([0-9a-fA-F]+)", output) commit_id = commit_match.group(1) commit_log = output - + git_remote_cmd = ["git", "remote", "-v"] output, error = run_cmd(git_remote_cmd, options, options.source) - + remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE) if not remote_match: raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source)) source_location = remote_match.group(1) source_name = remote_match.group(2).strip() - + if source_name[-1] == "/": source_name = source_name[:-1] return commit_id, commit_log, source_name, source_location - #end find_git_commit_information def do_git_commit(message, options): @@ -201,10 +218,10 @@ def do_git_commit(message, options): git_add_cmd = ["git", "add", "-A"] run_cmd(git_add_cmd, options, options.destination) - + git_commit_cmd = ["git", "commit", "-m%s" % message] run_cmd(git_commit_cmd, options, options.destination) - + git_log_cmd = ["git", "log", "--format=%h", "-1"] commit_sha1, error = run_cmd(git_log_cmd, options, options.destination) @@ -214,7 +231,7 @@ def do_git_commit(message, options): def verify_git_repo_clean(location, options): git_status_cmd = ["git", "status", "--porcelain"] output, error = run_cmd(git_status_cmd, options, location) - + if output != "": if options.no_validate_repo == False: raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot." @@ -223,7 +240,6 @@ def verify_git_repo_clean(location, options): print "WARNING: %s is not clean. Proceeding anyway." % location print "WARNING: This could lead to differences in the source and destination." print "WARNING: It could also lead to extra files being included in the snapshot commit." - #end verify_git_repo_clean def main(options): @@ -238,14 +254,14 @@ def main(options): commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now()) repo_name = options.source repo_location = options.source - + commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2 - + if options.dest_repo == "git": verify_git_repo_clean(options.destination, options) rsync(options.source, options.destination, options) - + if options.dest_repo == "git": do_git_commit(commit_message, options) elif options.dest_repo == "none": @@ -256,10 +272,6 @@ def main(options): cwd = os.getcwd() print "No commit done by request. Please use file at:" print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep) - - - - #end main if (__name__ == "__main__"): @@ -267,7 +279,7 @@ if (__name__ == "__main__"): doctest.testmod() sys.exit(0) - try: + try: options = parse_cmdline(__doc__) main(options) except RuntimeError, e: @@ -275,5 +287,5 @@ if (__name__ == "__main__"): if "--debug" in sys.argv: traceback.print_exc() sys.exit(1) - else: + else: sys.exit(0) diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia index 6909606643..8e1246bf8b 100755 --- a/lib/kokkos/config/test_all_sandia +++ b/lib/kokkos/config/test_all_sandia @@ -24,6 +24,8 @@ elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name fi elif [[ "$HOSTNAME" =~ apollo ]]; then MACHINE=apollo +elif [[ "$HOSTNAME" =~ sullivan ]]; then + MACHINE=sullivan elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then MACHINE=sems else @@ -152,7 +154,7 @@ if [ "$MACHINE" = "sems" ]; then "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS" "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" - "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) @@ -164,6 +166,7 @@ if [ "$MACHINE" = "sems" ]; then "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" @@ -184,7 +187,7 @@ elif [ "$MACHINE" = "white" ]; then # Format: (compiler module-list build-list exe-name warning-flag) COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) if [ -z "$ARCH_FLAG" ]; then @@ -221,7 +224,7 @@ elif [ "$MACHINE" = "sullivan" ]; then BASE_MODULE_LIST="/" # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("gcc/5.3.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") + COMPILERS=("gcc/6.1.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS") if [ -z "$ARCH_FLAG" ]; then ARCH_FLAG="--arch=ARMv8-ThunderX" @@ -278,11 +281,11 @@ elif [ "$MACHINE" = "apollo" ]; then "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS" "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS" "clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS" - "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) else # Format: (compiler module-list build-list exe-name warning-flag) - COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" "clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS" "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS" "gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" @@ -295,8 +298,8 @@ elif [ "$MACHINE" = "apollo" ]; then "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" - "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" - "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS" ) fi diff --git a/lib/kokkos/config/testing_scripts/jenkins_test_driver b/lib/kokkos/config/testing_scripts/jenkins_test_driver index 9cba7fa518..f393940304 100755 --- a/lib/kokkos/config/testing_scripts/jenkins_test_driver +++ b/lib/kokkos/config/testing_scripts/jenkins_test_driver @@ -48,7 +48,7 @@ esac #nvcc wrapper and make the wrapper the compiler. if [ $cuda_compiler != "" ]; then export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler - compiler=$kokkos_path/config/nvcc_wrapper + compiler=$kokkos_path/bin/nvcc_wrapper fi if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then diff --git a/lib/kokkos/config/trilinos-integration/checkin-test b/lib/kokkos/config/trilinos-integration/checkin-test new file mode 100644 index 0000000000..92a1b1c068 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/checkin-test @@ -0,0 +1,4 @@ +module purge +module load sems-env sems-gcc/4.9.3 sems-openmpi/1.10.1 sems-hdf5/1.8.12/parallel sems-netcdf/4.3.2/parallel sems-python/2.7.9 sems-zlib/1.2.8/base sems-cmake/3.5.2 sems-parmetis/4.0.3/64bit_parallel sems-scotch/6.0.3/nopthread_64bit_parallel sems-boost/1.59.0/base + +#Run Trilinos CheckinTest diff --git a/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh index 2692f76038..b81a3b1566 100755 --- a/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh +++ b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh @@ -1,5 +1,18 @@ #!/bin/bash -le +TRILINOS_UPDATE_BRANCH=$1 +TRILINOS_PRISTINE_BRANCH=$2 + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + export TRILINOS_UPDATED_PATH=${PWD}/trilinos-update export TRILINOS_PRISTINE_PATH=${PWD}/trilinos-pristine @@ -16,8 +29,8 @@ if [ ! -d "${TRILINOS_PRISTINE_PATH}" ]; then fi cd ${TRILINOS_UPDATED_PATH} -git checkout develop -git reset --hard origin/develop +git checkout $TRILINOS_UPDATE_BRANCH +git reset --hard origin/$TRILINOS_UPDATE_BRANCH git pull cd .. @@ -28,18 +41,14 @@ echo "" echo "" echo "Trilinos State:" git log --pretty=oneline --since=7.days -SHA=`git log --pretty=oneline --since=7.days | head -n 2 | tail -n 1 | awk '{print $1}'` cd .. cd ${TRILINOS_PRISTINE_PATH} git status -git log --pretty=oneline --since=7.days -echo "Checkout develop" -git checkout develop +echo "Checkout $TRILINOS_PRISTINE_BRANCH" +git checkout $TRILINOS_PRISTINE_BRANCH echo "Pull" git pull -echo "Checkout SHA" -git checkout ${SHA} cd .. cd ${TRILINOS_PRISTINE_PATH} diff --git a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel new file mode 100755 index 0000000000..23968e8c0f --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel @@ -0,0 +1,60 @@ +#!/bin/bash -el +ulimit -c 0 +module load devpack/openmpi/1.10.0/intel/16.1.056/cuda/none + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=OFF +export JENKINS_DO_OPENMP=OFF +export JENKINS_DO_PTHREAD=ON +export JENKINS_DO_SERIAL=OFF +export JENKINS_DO_COMPLEX=OFF + +export ARCH_CXX_FLAG="-xCORE-AVX2 -mkl" +export ARCH_C_FLAG="-xCORE-AVX2 -mkl" +export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a" +export LAPACK_LIBRARIES=${BLAS_LIBRARIES} + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON +export JENKINS_DO_SHARED=OFF + +export QUEUE=haswell + + +module load python + + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_slurm ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} diff --git a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel new file mode 100755 index 0000000000..964de3a002 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel @@ -0,0 +1,60 @@ +#!/bin/bash -el +ulimit -c 0 +module load devpack/openmpi/1.10.0/intel/16.1.056/cuda/none + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=OFF +export JENKINS_DO_OPENMP=OFF +export JENKINS_DO_PTHREAD=OFF +export JENKINS_DO_SERIAL=ON +export JENKINS_DO_COMPLEX=ON + +export ARCH_CXX_FLAG="-xCORE-AVX2 -mkl" +export ARCH_C_FLAG="-xCORE-AVX2 -mkl" +export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a" +export LAPACK_LIBRARIES=${BLAS_LIBRARIES} + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON +export JENKINS_DO_SHARED=OFF + +export QUEUE=haswell + + +module load python + + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_slurm ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} diff --git a/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_cuda b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_cuda new file mode 100755 index 0000000000..52af024858 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_cuda @@ -0,0 +1,63 @@ +#!/bin/bash -el +ulimit -c 0 + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44 +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=ON +export JENKINS_DO_OPENMP=OFF +export JENKINS_DO_PTHREAD=OFF +export JENKINS_DO_SERIAL=ON +export JENKINS_DO_COMPLEX=OFF + +export JENKINS_ARCH_CXX_FLAG="-mcpu=power8 -arch=sm_37" +export JENKINS_ARCH_C_FLAG="-mcpu=power8" +export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp" +export LAPACK_LIBRARIES="${LAPACK_ROOT}/lib/liblapack.a;gfortran;gomp" + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON + +export QUEUE=rhel7F + +module load python + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +export OMPI_CXX=${KOKKOS_PATH}/bin/nvcc_wrapper + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +export CUDA_LAUNCH_BLOCKING=1 +export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_lsf ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} + diff --git a/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_omp b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_omp new file mode 100755 index 0000000000..452165eef2 --- /dev/null +++ b/lib/kokkos/config/trilinos-integration/white_run_jenkins_script_omp @@ -0,0 +1,58 @@ +#!/bin/bash -el +ulimit -c 0 + +KOKKOS_BRANCH=$1 +TRILINOS_UPDATE_BRANCH=$2 +TRILINOS_PRISTINE_BRANCH=$3 + +if [ -z $KOKKOS_BRANCH ] +then + KOKKOS_BRANCH=develop +fi + +if [ -z $TRILINOS_UPDATE_BRANCH ] +then + TRILINOS_UPDATE_BRANCH=develop +fi + +if [ -z $TRILINOS_PRISTINE_BRANCH ] +then + TRILINOS_PRISTINE_BRANCH=develop +fi + +module load devpack/openmpi/1.10.4/gcc/5.4.0/cuda/8.0.44 +export OMP_NUM_THREADS=8 +export JENKINS_DO_CUDA=OFF +export JENKINS_DO_OPENMP=ON +export JENKINS_DO_PTHREAD=OFF +export JENKINS_DO_SERIAL=OFF +export JENKINS_DO_COMPLEX=OFF + +export JENKINS_ARCH_CXX_FLAG="-mcpu=power8" +export JENKINS_ARCH_C_FLAG="-mcpu=power8" +export BLAS_LIBRARIES="${BLAS_ROOT}/lib/libblas.a;gfortran;gomp" +export LAPACK_LIBRARIES="${LAPACK_ROOT}/lib/liblapack.a;gfortran;gomp" + +export JENKINS_DO_TESTS=ON +export JENKINS_DO_EXAMPLES=ON + +export QUEUE=rhel7F + +module load python + +export KOKKOS_PATH=${PWD}/kokkos + +#Already done: +if [ ! -d "${KOKKOS_PATH}" ]; then + git clone https://github.com/kokkos/kokkos ${KOKKOS_PATH} +fi + +cd ${KOKKOS_PATH} +git checkout $KOKKOS_BRANCH +git pull +cd .. + +source ${KOKKOS_PATH}/config/trilinos-integration/prepare_trilinos_repos.sh $TRILINOS_UPDATE_BRANCH $TRILINOS_PRISTINE_BRANCH + +${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/run_repo_comparison_lsf ${TRILINOS_UPDATED_PATH} ${TRILINOS_PRISTINE_PATH} ${TRILINOS_UPDATED_PATH}/sampleScripts/Sandia-SEMS/configure-testbeds-jenkins-all TestCompare ${QUEUE} + diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile index fa3bc77701..edaaf1ee51 100644 --- a/lib/kokkos/containers/performance_tests/Makefile +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -8,7 +8,7 @@ default: build_all echo "End Build" ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) - CXX = $(KOKKOS_PATH)/config/nvcc_wrapper + CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper else CXX = g++ endif @@ -21,8 +21,8 @@ include $(KOKKOS_PATH)/Makefile.kokkos KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests -TEST_TARGETS = -TARGETS = +TEST_TARGETS = +TARGETS = ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o @@ -65,7 +65,7 @@ build_all: $(TARGETS) test: $(TEST_TARGETS) -clean: kokkos-clean +clean: kokkos-clean rm -f *.o $(TARGETS) # Compilation rules @@ -73,6 +73,5 @@ clean: kokkos-clean %.o:%.cpp $(KOKKOS_CPP_DEPENDS) $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc - diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp index d5cad06a47..208387425f 100644 --- a/lib/kokkos/containers/performance_tests/TestCuda.cpp +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,12 +36,15 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ -#include +#include +#if defined( KOKKOS_ENABLE_CUDA ) + +#include #include #include #include @@ -52,8 +55,6 @@ #include -#if defined( KOKKOS_ENABLE_CUDA ) - #include #include @@ -79,7 +80,7 @@ protected: } }; -TEST_F( cuda, dynrankview_perf ) +TEST_F( cuda, dynrankview_perf ) { std::cout << "Cuda" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; @@ -105,5 +106,6 @@ TEST_F( cuda, unordered_map_performance_far) } } - +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTCUDA_PREVENT_EMPTY_LINK_ERROR() {} #endif /* #if defined( KOKKOS_ENABLE_CUDA ) */ diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp index d96a3f7432..4c0ccb6b88 100644 --- a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -1,13 +1,13 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -263,3 +263,4 @@ void test_dynrankview_op_perf( const int par_size ) } //end Performance #endif + diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp index da74d32ac1..b674ec4a74 100644 --- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include +#if defined( KOKKOS_ENABLE_OPENMP ) + #include #include @@ -93,7 +96,7 @@ protected: } }; -TEST_F( openmp, dynrankview_perf ) +TEST_F( openmp, dynrankview_perf ) { std::cout << "OpenMP" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; @@ -137,4 +140,7 @@ TEST_F( openmp, unordered_map_performance_far) } } // namespace test +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTOPENMP_PREVENT_EMPTY_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp index 4179b7de4c..a8910a3c72 100644 --- a/lib/kokkos/containers/performance_tests/TestThreads.cpp +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,11 +36,14 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ +#include +#if defined( KOKKOS_ENABLE_THREADS ) + #include #include @@ -87,7 +90,7 @@ protected: } }; -TEST_F( threads, dynrankview_perf ) +TEST_F( threads, dynrankview_perf ) { std::cout << "Threads" << std::endl; std::cout << " DynRankView vs View: Initialization Only " << std::endl; @@ -132,4 +135,7 @@ TEST_F( threads, unordered_map_performance_far) } // namespace Performance +#else +void KOKKOS_CONTAINERS_PERFORMANCE_TESTS_TESTTHREADS_PREVENT_EMPTY_LINK_ERROR() {} +#endif diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp index 74da5f61b5..7714506e92 100644 --- a/lib/kokkos/containers/src/Kokkos_Bitset.hpp +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -435,3 +435,4 @@ void deep_copy( ConstBitset & dst, ConstBitset const& src) } // namespace Kokkos #endif //KOKKOS_BITSET_HPP + diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp index 3a0196ee4c..937eab0d88 100644 --- a/lib/kokkos/containers/src/Kokkos_DualView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -442,6 +442,17 @@ public: modified_host () = (modified_device () > modified_host () ? modified_device () : modified_host ()) + 1; } + +#ifdef KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK + if (modified_host() && modified_device()) { + std::string msg = "Kokkos::DualView::modify ERROR: "; + msg += "Concurrent modification of host and device views "; + msg += "in DualView \""; + msg += d_view.label(); + msg += "\"\n"; + Kokkos::abort(msg.c_str()); + } +#endif } //@} @@ -624,3 +635,4 @@ deep_copy (const ExecutionSpace& exec , } // namespace Kokkos #endif + diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp index acb37f7f75..8e464506f9 100644 --- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -140,21 +140,21 @@ struct DynRankDimTraits { static typename std::enable_if< (std::is_same::value) , Layout>::type createLayout( const Layout& layout ) { return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 - , layout.stride[0] + , layout.stride[0] , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 - , layout.stride[1] + , layout.stride[1] , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 - , layout.stride[2] + , layout.stride[2] , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 - , layout.stride[3] + , layout.stride[3] , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 - , layout.stride[4] + , layout.stride[4] , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 - , layout.stride[5] + , layout.stride[5] , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 - , layout.stride[6] + , layout.stride[6] , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 - , layout.stride[7] + , layout.stride[7] ); } @@ -188,7 +188,7 @@ struct DynRankDimTraits { KOKKOS_INLINE_FUNCTION static typename std::enable_if< (std::is_same::value || std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) { - return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) , dynrank > 1 ? layout.dimension[1] : ~size_t(0) , dynrank > 2 ? layout.dimension[2] : ~size_t(0) , dynrank > 3 ? layout.dimension[3] : ~size_t(0) @@ -205,27 +205,27 @@ struct DynRankDimTraits { static typename std::enable_if< (std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) { return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) - , dynrank > 0 ? layout.stride[0] : (0) + , dynrank > 0 ? layout.stride[0] : (0) , dynrank > 1 ? layout.dimension[1] : ~size_t(0) - , dynrank > 1 ? layout.stride[1] : (0) + , dynrank > 1 ? layout.stride[1] : (0) , dynrank > 2 ? layout.dimension[2] : ~size_t(0) - , dynrank > 2 ? layout.stride[2] : (0) + , dynrank > 2 ? layout.stride[2] : (0) , dynrank > 3 ? layout.dimension[3] : ~size_t(0) - , dynrank > 3 ? layout.stride[3] : (0) + , dynrank > 3 ? layout.stride[3] : (0) , dynrank > 4 ? layout.dimension[4] : ~size_t(0) - , dynrank > 4 ? layout.stride[4] : (0) + , dynrank > 4 ? layout.stride[4] : (0) , dynrank > 5 ? layout.dimension[5] : ~size_t(0) - , dynrank > 5 ? layout.stride[5] : (0) + , dynrank > 5 ? layout.stride[5] : (0) , dynrank > 6 ? layout.dimension[6] : ~size_t(0) - , dynrank > 6 ? layout.stride[6] : (0) + , dynrank > 6 ? layout.stride[6] : (0) , dynrank > 7 ? layout.dimension[7] : ~size_t(0) - , dynrank > 7 ? layout.stride[7] : (0) + , dynrank > 7 ? layout.stride[7] : (0) ); } /** \brief Debug bounds-checking routines */ -// Enhanced debug checking - most infrastructure matches that of functions in +// Enhanced debug checking - most infrastructure matches that of functions in // Kokkos_ViewMapping; additional checks for extra arguments beyond rank are 0 template< unsigned , typename iType0 , class MapType > KOKKOS_INLINE_FUNCTION @@ -235,20 +235,20 @@ bool dyn_rank_view_verify_operator_bounds( const iType0 & , const MapType & ) template< unsigned R , typename iType0 , class MapType , typename iType1 , class ... Args > KOKKOS_INLINE_FUNCTION bool dyn_rank_view_verify_operator_bounds - ( const iType0 & rank + ( const iType0 & rank , const MapType & map , const iType1 & i , Args ... args ) { - if ( static_cast(R) < rank ) { + if ( static_cast(R) < rank ) { return ( size_t(i) < map.extent(R) ) && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); } else if ( i != 0 ) { printf("DynRankView Debug Bounds Checking Error: at rank %u\n Extra arguments beyond the rank must be zero \n",R); return ( false ) - && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); + && dyn_rank_view_verify_operator_bounds( rank , map , args ... ); } else { return ( true ) @@ -281,20 +281,24 @@ void dyn_rank_view_error_operator_bounds } // op_rank = rank of the operator version that was called -template< typename iType0 , typename iType1 , class MapType , class ... Args > +template< typename MemorySpace + , typename iType0 , typename iType1 , class MapType , class ... Args > KOKKOS_INLINE_FUNCTION void dyn_rank_view_verify_operator_bounds - ( const iType0 & op_rank , const iType1 & rank , const char* label , const MapType & map , Args ... args ) + ( const iType0 & op_rank , const iType1 & rank + , const Kokkos::Impl::SharedAllocationTracker & tracker + , const MapType & map , Args ... args ) { if ( static_cast(rank) > op_rank ) { - Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); + Kokkos::abort( "DynRankView Bounds Checking Error: Need at least rank arguments to the operator()" ); } if ( ! dyn_rank_view_verify_operator_bounds<0>( rank , map , args ... ) ) { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) enum { LEN = 1024 }; char buffer[ LEN ]; - int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label); + const std::string label = tracker.template get_label(); + int n = snprintf(buffer,LEN,"DynRankView bounds error of view %s (", label.c_str()); dyn_rank_view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); #else @@ -347,7 +351,7 @@ private: std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value || std::is_same< typename DstTraits::array_layout - , Kokkos::LayoutStride >::value + , Kokkos::LayoutStride >::value }; public: @@ -381,9 +385,9 @@ public: } //end Impl /* \class DynRankView - * \brief Container that creates a Kokkos view with rank determined at runtime. + * \brief Container that creates a Kokkos view with rank determined at runtime. * Essentially this is a rank 7 view that wraps the access operators - * to yield the functionality of a view + * to yield the functionality of a view * * Changes from View * 1. The rank of the DynRankView is returned by the method rank() @@ -410,14 +414,14 @@ class DynRankView : public ViewTraits< DataType , Properties ... > { static_assert( !std::is_array::value && !std::is_pointer::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); -private: +private: template < class , class ... > friend class DynRankView ; template < class , class ... > friend class Impl::ViewMapping ; -public: +public: typedef ViewTraits< DataType , Properties ... > drvtraits ; - typedef View< DataType******* , Properties...> view_type ; + typedef View< DataType******* , Properties...> view_type ; typedef ViewTraits< DataType******* , Properties ... > traits ; @@ -430,7 +434,7 @@ private: map_type m_map ; unsigned m_rank; -public: +public: KOKKOS_INLINE_FUNCTION view_type & DownCast() const { return ( view_type & ) (*this); } KOKKOS_INLINE_FUNCTION @@ -588,7 +592,7 @@ private: // rank of the calling operator - included as first argument in ARG #define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \ DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \ - Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds ARG ; + Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ; #else @@ -607,14 +611,10 @@ public: // Rank 0 KOKKOS_INLINE_FUNCTION reference_type operator()() const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , NULL , m_map) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank() , m_track.template get_label().c_str(),m_map) ) - #endif + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) ) return implementation_map().reference(); - //return m_map.reference(0,0,0,0,0,0,0); + //return m_map.reference(0,0,0,0,0,0,0); } // Rank 1 @@ -624,6 +624,8 @@ public: typename std::enable_if< std::is_same::value && std::is_integral::value, reference_type>::type operator[](const iType & i0) const { + //Phalanx is violating this, since they use the operator to access ALL elements in the allocation + //KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map) ) return data()[i0]; } @@ -647,14 +649,10 @@ public: template< typename iType > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType & i0 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label().c_str(),m_map,i0) ) - #endif - return m_map.reference(i0); + operator()(const iType & i0 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) + return m_map.reference(i0); } template< typename iType > @@ -662,11 +660,7 @@ public: typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type operator()(const iType & i0 ) const { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , NULL , m_map , i0) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank() , m_track.template get_label().c_str(),m_map,i0) ) - #endif + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) ) return m_map.reference(i0,0,0,0,0,0,0); } @@ -674,155 +668,111 @@ public: template< typename iType0 , typename iType1 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1) ) - #endif - return m_map.reference(i0,i1); + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) + return m_map.reference(i0,i1); } template< typename iType0 , typename iType1 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , NULL , m_map , i0 , i1) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1) ) - #endif - return m_map.reference(i0,i1,0,0,0,0,0); + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) ) + return m_map.reference(i0,i1,0,0,0,0,0); } // Rank 3 template< typename iType0 , typename iType1 , typename iType2 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2) ) - #endif - return m_map.reference(i0,i1,i2); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) + return m_map.reference(i0,i1,i2); } template< typename iType0 , typename iType1 , typename iType2 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , NULL , m_map , i0 , i1 , i2) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2) ) - #endif - return m_map.reference(i0,i1,i2,0,0,0,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) ) + return m_map.reference(i0,i1,i2,0,0,0,0); } // Rank 4 template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2,i3) ) - #endif - return m_map.reference(i0,i1,i2,i3); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) + return m_map.reference(i0,i1,i2,i3); } template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2,i3) ) - #endif - return m_map.reference(i0,i1,i2,i3,0,0,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) ) + return m_map.reference(i0,i1,i2,i3,0,0,0); } // Rank 5 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2,i3,i4) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) + return m_map.reference(i0,i1,i2,i3,i4); } template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2,i3,i4) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,0,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) ) + return m_map.reference(i0,i1,i2,i3,i4,0,0); } // Rank 6 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2,i3,i4,i5) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,i5); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) + return m_map.reference(i0,i1,i2,i3,i4,i5); } template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > KOKKOS_INLINE_FUNCTION typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2,i3,i4,i5) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,i5,0); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,0); } // Rank 7 template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > KOKKOS_INLINE_FUNCTION typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type - operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const - { - #ifndef KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , NULL , m_map , i0 , i1 , i2 , i3, i4 , i5 , i6) ) - #else - KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank() , m_track.template get_label().c_str(),m_map,i0,i1,i2,i3,i4,i5,i6) ) - #endif - return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { + KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); } #undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY //---------------------------------------- - // Standard constructor, destructor, and assignment operators... + // Standard constructor, destructor, and assignment operators... KOKKOS_INLINE_FUNCTION ~DynRankView() {} @@ -840,7 +790,7 @@ public: DynRankView & operator = ( const DynRankView & rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } KOKKOS_INLINE_FUNCTION - DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } //---------------------------------------- // Compatible view copy constructor and assignment @@ -1068,7 +1018,7 @@ public: DynRankView( const Label & arg_label , typename std::enable_if< Kokkos::Experimental::Impl::is_view_label