From 39be4185c40bca200eacc9fb1b8b8bc49f41106e Mon Sep 17 00:00:00 2001 From: stamoor Date: Tue, 6 Sep 2016 23:06:32 +0000 Subject: [PATCH] Updating Kokkos lib git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@15556 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- lib/kokkos/.gitignore | 8 + lib/kokkos/CMakeLists.txt | 184 + lib/kokkos/Copyright.txt | 40 + lib/kokkos/HOW_TO_SNAPSHOT | 73 + lib/kokkos/LICENSE | 40 + lib/kokkos/Makefile.kokkos | 480 + lib/kokkos/Makefile.targets | 72 + lib/kokkos/README | 152 + lib/kokkos/algorithms/CMakeLists.txt | 10 + .../algorithms/cmake/Dependencies.cmake | 5 + .../cmake/KokkosAlgorithms_config.h.in | 4 + lib/kokkos/algorithms/src/CMakeLists.txt | 21 + .../algorithms/src/KokkosAlgorithms_dummy.cpp | 0 lib/kokkos/algorithms/src/Kokkos_Random.hpp | 1751 ++ lib/kokkos/algorithms/src/Kokkos_Sort.hpp | 496 + .../algorithms/unit_tests/CMakeLists.txt | 38 + lib/kokkos/algorithms/unit_tests/Makefile | 92 + lib/kokkos/algorithms/unit_tests/TestCuda.cpp | 110 + .../algorithms/unit_tests/TestOpenMP.cpp | 102 + .../algorithms/unit_tests/TestRandom.hpp | 481 + .../algorithms/unit_tests/TestSerial.cpp | 99 + lib/kokkos/algorithms/unit_tests/TestSort.hpp | 206 + .../algorithms/unit_tests/TestThreads.cpp | 113 + .../algorithms/unit_tests/UnitTestMain.cpp | 50 + lib/kokkos/cmake/Dependencies.cmake | 10 + lib/kokkos/cmake/deps/CUDA.cmake | 79 + lib/kokkos/cmake/deps/CUSPARSE.cmake | 64 + lib/kokkos/cmake/deps/HWLOC.cmake | 70 + lib/kokkos/cmake/deps/Pthread.cmake | 83 + lib/kokkos/cmake/deps/QTHREAD.cmake | 70 + lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake | 75 + lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake | 71 + lib/kokkos/cmake/tpls/FindTPLPthread.cmake | 82 + lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake | 70 + lib/kokkos/cmake/tribits.cmake | 485 + lib/kokkos/config/configure_compton_cpu.sh | 190 + lib/kokkos/config/configure_compton_mic.sh | 186 + lib/kokkos/config/configure_kokkos.sh | 293 + lib/kokkos/config/configure_kokkos_bgq.sh | 88 + lib/kokkos/config/configure_kokkos_dev.sh | 216 + lib/kokkos/config/configure_kokkos_nvidia.sh | 204 + lib/kokkos/config/configure_shannon.sh | 190 + ...nfigure_tpetra_kokkos_cuda_nvcc_wrapper.sh | 140 + .../kokkos-trilinos-integration-procedure.txt | 153 + .../config/kokkos_dev/config-core-all.sh | 113 + .../kokkos_dev/config-core-cuda-omp-hwloc.sh | 104 + .../config/kokkos_dev/config-core-cuda.sh | 88 + .../kokkos_dev/config-core-cxx11-omp.sh | 84 + .../config/kokkos_dev/config-core-dbg-none.sh | 78 + .../kokkos_dev/config-core-intel-cuda-omp.sh | 89 + .../kokkos_dev/config-core-intel-omp.sh | 84 + .../config/kokkos_dev/config-core-omp.sh | 77 + .../kokkos_dev/config-core-threads-hwloc.sh | 87 + lib/kokkos/config/master_history.txt | 3 + lib/kokkos/config/nvcc_wrapper | 280 + lib/kokkos/config/snapshot.py | 279 + lib/kokkos/config/test_all_sandia | 539 + lib/kokkos/config/testing_scripts/README | 5 + .../testing_scripts/jenkins_test_driver | 83 + .../config/testing_scripts/obj_size_opt_check | 287 + lib/kokkos/containers/CMakeLists.txt | 10 + .../containers/cmake/Dependencies.cmake | 5 + .../cmake/KokkosContainers_config.h.in | 4 + .../performance_tests/CMakeLists.txt | 37 + .../containers/performance_tests/Makefile | 81 + .../containers/performance_tests/TestCuda.cpp | 109 + .../performance_tests/TestDynRankView.hpp | 265 + .../performance_tests/TestGlobal2LocalIds.hpp | 231 + .../containers/performance_tests/TestMain.cpp | 50 + .../performance_tests/TestOpenMP.cpp | 140 + .../performance_tests/TestThreads.cpp | 135 + .../TestUnorderedMapPerformance.hpp | 262 + lib/kokkos/containers/src/CMakeLists.txt | 31 + lib/kokkos/containers/src/Kokkos_Bitset.hpp | 437 + lib/kokkos/containers/src/Kokkos_DualView.hpp | 982 + .../containers/src/Kokkos_DynRankView.hpp | 1834 ++ .../containers/src/Kokkos_DynamicView.hpp | 494 + .../containers/src/Kokkos_Functional.hpp | 173 + .../containers/src/Kokkos_SegmentedView.hpp | 531 + .../containers/src/Kokkos_StaticCrsGraph.hpp | 226 + .../containers/src/Kokkos_UnorderedMap.hpp | 848 + lib/kokkos/containers/src/Kokkos_Vector.hpp | 283 + .../src/impl/Kokkos_Bitset_impl.hpp | 109 + .../src/impl/Kokkos_Functional_impl.hpp | 195 + .../impl/Kokkos_StaticCrsGraph_factory.hpp | 208 + .../src/impl/Kokkos_UnorderedMap_impl.cpp | 101 + .../src/impl/Kokkos_UnorderedMap_impl.hpp | 297 + .../containers/unit_tests/CMakeLists.txt | 40 + lib/kokkos/containers/unit_tests/Makefile | 92 + .../containers/unit_tests/TestBitset.hpp | 285 + .../containers/unit_tests/TestComplex.hpp | 263 + lib/kokkos/containers/unit_tests/TestCuda.cpp | 227 + .../containers/unit_tests/TestDualView.hpp | 121 + .../containers/unit_tests/TestDynViewAPI.hpp | 1559 ++ .../containers/unit_tests/TestDynamicView.hpp | 168 + .../containers/unit_tests/TestOpenMP.cpp | 182 + .../unit_tests/TestSegmentedView.hpp | 708 + .../containers/unit_tests/TestSerial.cpp | 175 + .../unit_tests/TestStaticCrsGraph.hpp | 149 + .../containers/unit_tests/TestThreads.cpp | 188 + .../unit_tests/TestUnorderedMap.hpp | 313 + .../containers/unit_tests/TestVector.hpp | 131 + .../containers/unit_tests/UnitTestMain.cpp | 50 + lib/kokkos/core/CMakeLists.txt | 11 + lib/kokkos/core/cmake/Dependencies.cmake | 4 + lib/kokkos/core/cmake/KokkosCore_config.h.in | 57 + lib/kokkos/core/perf_test/CMakeLists.txt | 29 + lib/kokkos/core/perf_test/Makefile | 66 + .../core/perf_test/PerfTestBlasKernels.hpp | 309 + lib/kokkos/core/perf_test/PerfTestCuda.cpp | 189 + lib/kokkos/core/perf_test/PerfTestDriver.hpp | 152 + .../core/perf_test/PerfTestGramSchmidt.hpp | 226 + lib/kokkos/core/perf_test/PerfTestHexGrad.hpp | 268 + lib/kokkos/core/perf_test/PerfTestHost.cpp | 104 + lib/kokkos/core/perf_test/PerfTestMain.cpp | 49 + lib/kokkos/core/perf_test/test_atomic.cpp | 507 + lib/kokkos/core/src/CMakeLists.txt | 113 + .../core/src/Cuda/KokkosExp_Cuda_View.hpp | 334 + lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp | 318 + lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp | 829 + .../core/src/Cuda/Kokkos_Cuda_Alloc.hpp | 182 + .../core/src/Cuda/Kokkos_Cuda_Error.hpp | 69 + lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp | 778 + .../core/src/Cuda/Kokkos_Cuda_Internal.hpp | 202 + .../core/src/Cuda/Kokkos_Cuda_Parallel.hpp | 1926 ++ .../core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp | 433 + lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp | 179 + lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp | 519 + .../core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp | 932 + .../core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp | 833 + .../src/Cuda/Kokkos_Cuda_Vectorization.hpp | 298 + lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp | 93 + .../core/src/Cuda/Kokkos_Cuda_abort.hpp | 119 + .../core/src/KokkosExp_MDRangePolicy.hpp | 611 + lib/kokkos/core/src/Kokkos_Array.hpp | 302 + lib/kokkos/core/src/Kokkos_Atomic.hpp | 305 + lib/kokkos/core/src/Kokkos_Complex.hpp | 538 + lib/kokkos/core/src/Kokkos_Concepts.hpp | 78 + lib/kokkos/core/src/Kokkos_Core.hpp | 174 + lib/kokkos/core/src/Kokkos_Core_fwd.hpp | 247 + lib/kokkos/core/src/Kokkos_Cuda.hpp | 274 + lib/kokkos/core/src/Kokkos_CudaSpace.hpp | 802 + lib/kokkos/core/src/Kokkos_ExecPolicy.hpp | 570 + lib/kokkos/core/src/Kokkos_HBWSpace.hpp | 312 + lib/kokkos/core/src/Kokkos_HostSpace.hpp | 275 + lib/kokkos/core/src/Kokkos_Layout.hpp | 233 + lib/kokkos/core/src/Kokkos_Macros.hpp | 470 + lib/kokkos/core/src/Kokkos_MemoryPool.hpp | 1523 ++ lib/kokkos/core/src/Kokkos_MemoryTraits.hpp | 116 + lib/kokkos/core/src/Kokkos_OpenMP.hpp | 189 + lib/kokkos/core/src/Kokkos_Pair.hpp | 530 + lib/kokkos/core/src/Kokkos_Parallel.hpp | 527 + .../core/src/Kokkos_Parallel_Reduce.hpp | 1240 + lib/kokkos/core/src/Kokkos_Qthread.hpp | 172 + lib/kokkos/core/src/Kokkos_ScratchSpace.hpp | 166 + lib/kokkos/core/src/Kokkos_Serial.hpp | 1116 + lib/kokkos/core/src/Kokkos_TaskPolicy.hpp | 1109 + lib/kokkos/core/src/Kokkos_Threads.hpp | 222 + lib/kokkos/core/src/Kokkos_Vectorization.hpp | 53 + lib/kokkos/core/src/Kokkos_View.hpp | 2384 ++ lib/kokkos/core/src/Kokkos_hwloc.hpp | 144 + lib/kokkos/core/src/Makefile | 124 + .../src/OpenMP/Kokkos_OpenMP_Parallel.hpp | 750 + .../core/src/OpenMP/Kokkos_OpenMP_Task.cpp | 329 + .../core/src/OpenMP/Kokkos_OpenMP_Task.hpp | 356 + .../core/src/OpenMP/Kokkos_OpenMPexec.cpp | 408 + .../core/src/OpenMP/Kokkos_OpenMPexec.hpp | 1083 + .../core/src/Qthread/Kokkos_QthreadExec.cpp | 511 + .../core/src/Qthread/Kokkos_QthreadExec.hpp | 620 + .../src/Qthread/Kokkos_Qthread_Parallel.hpp | 745 + .../src/Qthread/Kokkos_Qthread_TaskPolicy.cpp | 491 + .../src/Qthread/Kokkos_Qthread_TaskPolicy.hpp | 664 + lib/kokkos/core/src/Qthread/README | 25 + .../core/src/Threads/Kokkos_ThreadsExec.cpp | 826 + .../core/src/Threads/Kokkos_ThreadsExec.hpp | 631 + .../src/Threads/Kokkos_ThreadsExec_base.cpp | 255 + .../core/src/Threads/Kokkos_ThreadsTeam.hpp | 932 + .../src/Threads/Kokkos_Threads_Parallel.hpp | 658 + .../src/Threads/Kokkos_Threads_TaskPolicy.cpp | 930 + .../src/Threads/Kokkos_Threads_TaskPolicy.hpp | 745 + lib/kokkos/core/src/impl/CMakeLists.txt | 18 + .../core/src/impl/KokkosExp_SharedAlloc.cpp | 346 + .../core/src/impl/KokkosExp_SharedAlloc.hpp | 400 + .../core/src/impl/KokkosExp_ViewArray.hpp | 606 + .../core/src/impl/KokkosExp_ViewCtor.hpp | 252 + .../core/src/impl/KokkosExp_ViewMapping.hpp | 2932 +++ .../core/src/impl/KokkosExp_ViewTile.hpp | 227 + .../core/src/impl/Kokkos_AnalyzePolicy.hpp | 197 + .../core/src/impl/Kokkos_AnalyzeShape.hpp | 260 + .../core/src/impl/Kokkos_Atomic_Assembly.hpp | 112 + .../Kokkos_Atomic_Compare_Exchange_Strong.hpp | 271 + .../core/src/impl/Kokkos_Atomic_Decrement.hpp | 117 + .../core/src/impl/Kokkos_Atomic_Exchange.hpp | 359 + .../core/src/impl/Kokkos_Atomic_Fetch_Add.hpp | 340 + .../core/src/impl/Kokkos_Atomic_Fetch_And.hpp | 125 + .../core/src/impl/Kokkos_Atomic_Fetch_Or.hpp | 125 + .../core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp | 235 + .../core/src/impl/Kokkos_Atomic_Generic.hpp | 419 + .../core/src/impl/Kokkos_Atomic_Increment.hpp | 117 + .../core/src/impl/Kokkos_Atomic_View.hpp | 430 + .../core/src/impl/Kokkos_Atomic_Windows.hpp | 232 + lib/kokkos/core/src/impl/Kokkos_BitOps.hpp | 122 + .../core/src/impl/Kokkos_CPUDiscovery.cpp | 124 + .../core/src/impl/Kokkos_CPUDiscovery.hpp | 51 + lib/kokkos/core/src/impl/Kokkos_Core.cpp | 454 + lib/kokkos/core/src/impl/Kokkos_Error.cpp | 193 + lib/kokkos/core/src/impl/Kokkos_Error.hpp | 82 + .../core/src/impl/Kokkos_ExecPolicy.cpp | 19 + .../core/src/impl/Kokkos_FunctorAdapter.hpp | 1131 + .../core/src/impl/Kokkos_HBWAllocators.cpp | 108 + .../core/src/impl/Kokkos_HBWAllocators.hpp | 75 + lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp | 379 + lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp | 537 + .../core/src/impl/Kokkos_Memory_Fence.hpp | 107 + .../core/src/impl/Kokkos_PhysicalLayout.hpp | 73 + .../src/impl/Kokkos_Profiling_DeviceInfo.hpp | 57 + .../src/impl/Kokkos_Profiling_Interface.cpp | 186 + .../src/impl/Kokkos_Profiling_Interface.hpp | 118 + lib/kokkos/core/src/impl/Kokkos_Serial.cpp | 119 + .../core/src/impl/Kokkos_Serial_Task.cpp | 147 + .../core/src/impl/Kokkos_Serial_Task.hpp | 271 + .../src/impl/Kokkos_Serial_TaskPolicy.cpp | 348 + .../src/impl/Kokkos_Serial_TaskPolicy.hpp | 677 + lib/kokkos/core/src/impl/Kokkos_Shape.cpp | 178 + lib/kokkos/core/src/impl/Kokkos_Shape.hpp | 917 + lib/kokkos/core/src/impl/Kokkos_Singleton.hpp | 55 + .../core/src/impl/Kokkos_StaticAssert.hpp | 79 + .../core/src/impl/Kokkos_Synchronic.hpp | 693 + .../src/impl/Kokkos_Synchronic_Config.hpp | 169 + .../core/src/impl/Kokkos_Synchronic_n3998.hpp | 162 + lib/kokkos/core/src/impl/Kokkos_Tags.hpp | 198 + lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp | 499 + .../core/src/impl/Kokkos_TaskQueue_impl.hpp | 569 + lib/kokkos/core/src/impl/Kokkos_Timer.hpp | 118 + lib/kokkos/core/src/impl/Kokkos_Traits.hpp | 501 + .../core/src/impl/Kokkos_ViewDefault.hpp | 886 + .../core/src/impl/Kokkos_ViewOffset.hpp | 1341 ++ .../core/src/impl/Kokkos_ViewSupport.hpp | 393 + .../core/src/impl/Kokkos_ViewTileLeft.hpp | 56 + .../core/src/impl/Kokkos_Volatile_Load.hpp | 242 + lib/kokkos/core/src/impl/Kokkos_hwloc.cpp | 726 + lib/kokkos/core/src/impl/Kokkos_spinwait.cpp | 89 + lib/kokkos/core/src/impl/Kokkos_spinwait.hpp | 64 + lib/kokkos/core/unit_test/CMakeLists.txt | 105 + lib/kokkos/core/unit_test/Makefile | 153 + lib/kokkos/core/unit_test/TestAggregate.hpp | 109 + .../core/unit_test/TestAggregateReduction.hpp | 191 + lib/kokkos/core/unit_test/TestAtomic.hpp | 402 + .../core/unit_test/TestAtomicOperations.hpp | 841 + lib/kokkos/core/unit_test/TestCXX11.hpp | 334 + .../core/unit_test/TestCXX11Deduction.hpp | 94 + .../core/unit_test/TestCompilerMacros.hpp | 93 + lib/kokkos/core/unit_test/TestCuda.cpp | 290 + lib/kokkos/core/unit_test/TestCuda_a.cpp | 182 + lib/kokkos/core/unit_test/TestCuda_b.cpp | 191 + lib/kokkos/core/unit_test/TestCuda_c.cpp | 375 + .../core/unit_test/TestDefaultDeviceType.cpp | 242 + .../unit_test/TestDefaultDeviceTypeInit.hpp | 419 + .../unit_test/TestDefaultDeviceTypeInit_1.cpp | 2 + .../TestDefaultDeviceTypeInit_10.cpp | 2 + .../TestDefaultDeviceTypeInit_11.cpp | 2 + .../TestDefaultDeviceTypeInit_12.cpp | 2 + .../TestDefaultDeviceTypeInit_13.cpp | 2 + .../TestDefaultDeviceTypeInit_14.cpp | 2 + .../TestDefaultDeviceTypeInit_15.cpp | 2 + .../TestDefaultDeviceTypeInit_16.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_2.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_3.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_4.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_5.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_6.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_7.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_8.cpp | 2 + .../unit_test/TestDefaultDeviceTypeInit_9.cpp | 2 + .../unit_test/TestDefaultDeviceType_a.cpp | 76 + lib/kokkos/core/unit_test/TestHWLOC.cpp | 69 + lib/kokkos/core/unit_test/TestMDRange.hpp | 555 + lib/kokkos/core/unit_test/TestMemoryPool.hpp | 820 + .../unit_test/TestMemorySpaceTracking.hpp | 100 + lib/kokkos/core/unit_test/TestOpenMP.cpp | 262 + lib/kokkos/core/unit_test/TestOpenMP_a.cpp | 150 + lib/kokkos/core/unit_test/TestOpenMP_b.cpp | 185 + lib/kokkos/core/unit_test/TestOpenMP_c.cpp | 262 + .../core/unit_test/TestPolicyConstruction.hpp | 493 + lib/kokkos/core/unit_test/TestQthread.cpp | 290 + lib/kokkos/core/unit_test/TestRange.hpp | 242 + lib/kokkos/core/unit_test/TestReduce.hpp | 1872 ++ lib/kokkos/core/unit_test/TestScan.hpp | 103 + lib/kokkos/core/unit_test/TestSerial.cpp | 571 + lib/kokkos/core/unit_test/TestSharedAlloc.hpp | 215 + lib/kokkos/core/unit_test/TestSynchronic.cpp | 448 + lib/kokkos/core/unit_test/TestSynchronic.hpp | 240 + lib/kokkos/core/unit_test/TestTaskPolicy.hpp | 1145 + lib/kokkos/core/unit_test/TestTeam.hpp | 910 + lib/kokkos/core/unit_test/TestTeamVector.hpp | 646 + .../unit_test/TestTemplateMetaFunctions.hpp | 219 + lib/kokkos/core/unit_test/TestThreads.cpp | 614 + lib/kokkos/core/unit_test/TestTile.hpp | 153 + lib/kokkos/core/unit_test/TestViewAPI.hpp | 1416 ++ lib/kokkos/core/unit_test/TestViewImpl.hpp | 289 + lib/kokkos/core/unit_test/TestViewMapping.hpp | 1307 + lib/kokkos/core/unit_test/TestViewOfClass.hpp | 163 + lib/kokkos/core/unit_test/TestViewSubview.hpp | 874 + lib/kokkos/core/unit_test/UnitTestMain.cpp | 50 + lib/kokkos/doc/Doxyfile | 127 + lib/kokkos/doc/Kokkos_PG.pdf | Bin 0 -> 1359256 bytes lib/kokkos/doc/README | 32 + lib/kokkos/doc/build_docs | 15 + lib/kokkos/doc/index.doc | 72 + lib/kokkos/example/CMakeLists.txt | 20 + lib/kokkos/example/README | 16 + lib/kokkos/example/cmake/Dependencies.cmake | 4 + lib/kokkos/example/common/VectorImport.hpp | 294 + lib/kokkos/example/common/WrapMPI.hpp | 103 + lib/kokkos/example/feint/CMakeLists.txt | 18 + lib/kokkos/example/feint/ElemFunctor.hpp | 489 + lib/kokkos/example/feint/Makefile | 61 + lib/kokkos/example/feint/feint.hpp | 165 + lib/kokkos/example/feint/feint_cuda.cpp | 67 + lib/kokkos/example/feint/feint_fwd.hpp | 60 + lib/kokkos/example/feint/feint_openmp.cpp | 67 + lib/kokkos/example/feint/feint_threads.cpp | 66 + lib/kokkos/example/feint/main.cpp | 110 + lib/kokkos/example/fenl/CGSolve.hpp | 296 + lib/kokkos/example/fenl/CMakeLists.txt | 17 + lib/kokkos/example/fenl/Makefile | 54 + lib/kokkos/example/fenl/fenl.cpp | 117 + lib/kokkos/example/fenl/fenl.hpp | 89 + lib/kokkos/example/fenl/fenl_functors.hpp | 1173 + lib/kokkos/example/fenl/fenl_impl.hpp | 598 + lib/kokkos/example/fenl/main.cpp | 422 + lib/kokkos/example/fixture/BoxElemFixture.hpp | 355 + lib/kokkos/example/fixture/BoxElemPart.cpp | 413 + lib/kokkos/example/fixture/BoxElemPart.hpp | 320 + lib/kokkos/example/fixture/CMakeLists.txt | 13 + lib/kokkos/example/fixture/HexElement.hpp | 270 + lib/kokkos/example/fixture/Main.cpp | 304 + lib/kokkos/example/fixture/Makefile | 48 + lib/kokkos/example/fixture/TestFixture.cpp | 58 + lib/kokkos/example/fixture/TestFixture.hpp | 156 + .../example/global_2_local_ids/CMakeLists.txt | 17 + lib/kokkos/example/global_2_local_ids/G2L.hpp | 266 + .../example/global_2_local_ids/G2L_Main.cpp | 149 + .../example/global_2_local_ids/Makefile | 53 + lib/kokkos/example/grow_array/CMakeLists.txt | 14 + lib/kokkos/example/grow_array/Makefile | 53 + lib/kokkos/example/grow_array/grow_array.hpp | 257 + lib/kokkos/example/grow_array/main.cpp | 110 + lib/kokkos/example/ichol/Makefile | 63 + .../example_chol_performance_device.hpp | 240 + .../example_chol_performance_device_cuda.cpp | 70 + ...xample_chol_performance_device_pthread.cpp | 67 + lib/kokkos/example/ichol/src/chol.hpp | 92 + lib/kokkos/example/ichol/src/chol_u.hpp | 23 + .../ichol/src/chol_u_right_look_by_blocks.hpp | 394 + .../ichol/src/chol_u_unblocked_opt1.hpp | 90 + .../ichol/src/chol_u_unblocked_opt2.hpp | 154 + lib/kokkos/example/ichol/src/control.hpp | 110 + lib/kokkos/example/ichol/src/coo.hpp | 75 + .../example/ichol/src/crs_matrix_base.hpp | 598 + .../ichol/src/crs_matrix_base_import.hpp | 104 + .../example/ichol/src/crs_matrix_helper.hpp | 71 + .../ichol/src/crs_matrix_helper_impl.hpp | 364 + .../example/ichol/src/crs_matrix_view.hpp | 226 + lib/kokkos/example/ichol/src/crs_row_view.hpp | 185 + lib/kokkos/example/ichol/src/dot.hpp | 74 + lib/kokkos/example/ichol/src/gemm.hpp | 99 + lib/kokkos/example/ichol/src/gemm_ct_nt.hpp | 12 + .../src/gemm_ct_nt_for_factor_blocked.hpp | 108 + .../example/ichol/src/graph_helper_scotch.hpp | 427 + lib/kokkos/example/ichol/src/herk.hpp | 91 + lib/kokkos/example/ichol/src/herk_u_ct.hpp | 11 + .../src/herk_u_ct_for_factor_blocked.hpp | 103 + lib/kokkos/example/ichol/src/norm.hpp | 82 + lib/kokkos/example/ichol/src/partition.hpp | 381 + lib/kokkos/example/ichol/src/scale.hpp | 92 + .../ichol/src/symbolic_factor_helper.hpp | 379 + .../example/ichol/src/symbolic_task.hpp | 118 + lib/kokkos/example/ichol/src/task_factory.hpp | 77 + lib/kokkos/example/ichol/src/task_view.hpp | 104 + lib/kokkos/example/ichol/src/trsm.hpp | 92 + lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp | 14 + .../src/trsm_l_u_ct_for_factor_blocked.hpp | 185 + lib/kokkos/example/ichol/src/util.cpp | 4 + lib/kokkos/example/ichol/src/util.hpp | 237 + lib/kokkos/example/md_skeleton/CMakeLists.txt | 16 + lib/kokkos/example/md_skeleton/Makefile | 53 + lib/kokkos/example/md_skeleton/README | 3 + lib/kokkos/example/md_skeleton/force.cpp | 192 + lib/kokkos/example/md_skeleton/main.cpp | 205 + lib/kokkos/example/md_skeleton/neighbor.cpp | 430 + lib/kokkos/example/md_skeleton/setup.cpp | 271 + lib/kokkos/example/md_skeleton/system.h | 92 + lib/kokkos/example/md_skeleton/types.h | 118 + .../example/multi_fem/BoxMeshFixture.hpp | 610 + .../example/multi_fem/BoxMeshPartition.cpp | 381 + .../example/multi_fem/BoxMeshPartition.hpp | 210 + lib/kokkos/example/multi_fem/CMakeLists.txt | 16 + lib/kokkos/example/multi_fem/Explicit.hpp | 452 + .../example/multi_fem/ExplicitFunctors.hpp | 1471 ++ lib/kokkos/example/multi_fem/FEMesh.hpp | 86 + lib/kokkos/example/multi_fem/HexElement.hpp | 268 + .../multi_fem/HexExplicitFunctions.hpp | 443 + lib/kokkos/example/multi_fem/Implicit.hpp | 341 + .../example/multi_fem/ImplicitFunctors.hpp | 585 + lib/kokkos/example/multi_fem/LinAlgBLAS.hpp | 567 + lib/kokkos/example/multi_fem/Makefile | 53 + lib/kokkos/example/multi_fem/Nonlinear.hpp | 573 + .../multi_fem/NonlinearElement_Cuda.hpp | 390 + .../example/multi_fem/NonlinearFunctors.hpp | 482 + lib/kokkos/example/multi_fem/ParallelComm.hpp | 167 + .../example/multi_fem/ParallelDataMap.hpp | 517 + .../example/multi_fem/ParallelMachine.cpp | 178 + .../example/multi_fem/ParallelMachine.hpp | 118 + .../example/multi_fem/SparseLinearSystem.hpp | 400 + .../multi_fem/SparseLinearSystemFill.hpp | 276 + .../multi_fem/SparseLinearSystem_Cuda.hpp | 164 + .../example/multi_fem/TestBoxMeshFixture.hpp | 242 + .../multi_fem/TestBoxMeshPartition.cpp | 172 + lib/kokkos/example/multi_fem/TestCuda.cpp | 192 + lib/kokkos/example/multi_fem/TestHost.cpp | 137 + .../example/multi_fem/TestHybridFEM.cpp | 348 + .../example/query_device/CMakeLists.txt | 14 + lib/kokkos/example/query_device/Makefile | 53 + .../example/query_device/query_device.cpp | 100 + lib/kokkos/example/sort_array/CMakeLists.txt | 14 + lib/kokkos/example/sort_array/Makefile | 53 + lib/kokkos/example/sort_array/main.cpp | 95 + lib/kokkos/example/sort_array/sort_array.hpp | 190 + .../tutorial/01_hello_world/CMakeLists.txt | 11 + .../example/tutorial/01_hello_world/Makefile | 43 + .../tutorial/01_hello_world/hello_world.cpp | 130 + .../01_hello_world_lambda/CMakeLists.txt | 13 + .../tutorial/01_hello_world_lambda/Makefile | 44 + .../hello_world_lambda.cpp | 109 + .../tutorial/02_simple_reduce/CMakeLists.txt | 10 + .../tutorial/02_simple_reduce/Makefile | 43 + .../02_simple_reduce/simple_reduce.cpp | 101 + .../02_simple_reduce_lambda/CMakeLists.txt | 12 + .../tutorial/02_simple_reduce_lambda/Makefile | 44 + .../simple_reduce_lambda.cpp | 86 + .../tutorial/03_simple_view/CMakeLists.txt | 10 + .../example/tutorial/03_simple_view/Makefile | 43 + .../tutorial/03_simple_view/simple_view.cpp | 142 + .../03_simple_view_lambda/CMakeLists.txt | 12 + .../tutorial/03_simple_view_lambda/Makefile | 44 + .../simple_view_lambda.cpp | 116 + .../04_simple_memoryspaces/CMakeLists.txt | 10 + .../tutorial/04_simple_memoryspaces/Makefile | 43 + .../simple_memoryspaces.cpp | 101 + .../tutorial/05_simple_atomics/CMakeLists.txt | 10 + .../tutorial/05_simple_atomics/Makefile | 43 + .../05_simple_atomics/simple_atomics.cpp | 137 + .../01_data_layouts/CMakeLists.txt | 10 + .../Advanced_Views/01_data_layouts/Makefile | 43 + .../01_data_layouts/data_layouts.cpp | 171 + .../02_memory_traits/CMakeLists.txt | 10 + .../Advanced_Views/02_memory_traits/Makefile | 43 + .../02_memory_traits/memory_traits.cpp | 141 + .../Advanced_Views/03_subviews/CMakeLists.txt | 10 + .../Advanced_Views/03_subviews/Makefile | 43 + .../Advanced_Views/03_subviews/subviews.cpp | 190 + .../04_dualviews/CMakeLists.txt | 10 + .../Advanced_Views/04_dualviews/Makefile | 43 + .../Advanced_Views/04_dualviews/dual_view.cpp | 218 + .../05_NVIDIA_UVM/CMakeLists.txt | 13 + .../Advanced_Views/05_NVIDIA_UVM/Makefile | 43 + .../05_NVIDIA_UVM/uvm_example.cpp | 134 + .../Advanced_Views/06_AtomicViews/Makefile | 43 + .../07_Overlapping_DeepCopy/Makefile | 43 + .../overlapping_deepcopy.cpp | 148 + .../tutorial/Advanced_Views/CMakeLists.txt | 9 + .../example/tutorial/Advanced_Views/Makefile | 84 + .../Algorithms/01_random_numbers/Makefile | 43 + .../01_random_numbers/random_numbers.cpp | 152 + .../example/tutorial/Algorithms/Makefile | 24 + lib/kokkos/example/tutorial/CMakeLists.txt | 17 + .../01_thread_teams/CMakeLists.txt | 10 + .../01_thread_teams/Makefile | 43 + .../01_thread_teams/thread_teams.cpp | 94 + .../01_thread_teams_lambda/CMakeLists.txt | 13 + .../01_thread_teams_lambda/Makefile | 44 + .../thread_teams_lambda.cpp | 94 + .../02_nested_parallel_for/CMakeLists.txt | 10 + .../02_nested_parallel_for/Makefile | 43 + .../nested_parallel_for.cpp | 89 + .../03_vectorization/CMakeLists.txt | 16 + .../03_vectorization/Makefile | 43 + .../03_vectorization/vectorization.cpp | 162 + .../04_team_scan/CMakeLists.txt | 10 + .../04_team_scan/Makefile | 43 + .../04_team_scan/team_scan.cpp | 141 + .../Hierarchical_Parallelism/CMakeLists.txt | 8 + .../Hierarchical_Parallelism/Makefile | 72 + lib/kokkos/example/tutorial/Makefile | 144 + lib/kokkos/example/tutorial/README | 17 + lib/kokkos/generate_makefile.bash | 336 + lib/kokkos/tpls/gtest/gtest/LICENSE | 28 + lib/kokkos/tpls/gtest/gtest/README | 13 + lib/kokkos/tpls/gtest/gtest/gtest-all.cc | 9594 ++++++++ lib/kokkos/tpls/gtest/gtest/gtest-test-part.h | 1 + lib/kokkos/tpls/gtest/gtest/gtest.h | 20065 ++++++++++++++++ 502 files changed, 157510 insertions(+) create mode 100644 lib/kokkos/.gitignore create mode 100644 lib/kokkos/CMakeLists.txt create mode 100644 lib/kokkos/Copyright.txt create mode 100644 lib/kokkos/HOW_TO_SNAPSHOT create mode 100644 lib/kokkos/LICENSE create mode 100644 lib/kokkos/Makefile.kokkos create mode 100644 lib/kokkos/Makefile.targets create mode 100644 lib/kokkos/README create mode 100644 lib/kokkos/algorithms/CMakeLists.txt create mode 100644 lib/kokkos/algorithms/cmake/Dependencies.cmake create mode 100644 lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in create mode 100644 lib/kokkos/algorithms/src/CMakeLists.txt create mode 100644 lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp create mode 100644 lib/kokkos/algorithms/src/Kokkos_Random.hpp create mode 100644 lib/kokkos/algorithms/src/Kokkos_Sort.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/CMakeLists.txt create mode 100644 lib/kokkos/algorithms/unit_tests/Makefile create mode 100644 lib/kokkos/algorithms/unit_tests/TestCuda.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestRandom.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestSerial.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestSort.hpp create mode 100644 lib/kokkos/algorithms/unit_tests/TestThreads.cpp create mode 100644 lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp create mode 100644 lib/kokkos/cmake/Dependencies.cmake create mode 100644 lib/kokkos/cmake/deps/CUDA.cmake create mode 100644 lib/kokkos/cmake/deps/CUSPARSE.cmake create mode 100644 lib/kokkos/cmake/deps/HWLOC.cmake create mode 100644 lib/kokkos/cmake/deps/Pthread.cmake create mode 100644 lib/kokkos/cmake/deps/QTHREAD.cmake create mode 100644 lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake create mode 100644 lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake create mode 100644 lib/kokkos/cmake/tpls/FindTPLPthread.cmake create mode 100644 lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake create mode 100644 lib/kokkos/cmake/tribits.cmake create mode 100755 lib/kokkos/config/configure_compton_cpu.sh create mode 100755 lib/kokkos/config/configure_compton_mic.sh create mode 100755 lib/kokkos/config/configure_kokkos.sh create mode 100755 lib/kokkos/config/configure_kokkos_bgq.sh create mode 100755 lib/kokkos/config/configure_kokkos_dev.sh create mode 100755 lib/kokkos/config/configure_kokkos_nvidia.sh create mode 100755 lib/kokkos/config/configure_shannon.sh create mode 100755 lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh create mode 100644 lib/kokkos/config/kokkos-trilinos-integration-procedure.txt create mode 100755 lib/kokkos/config/kokkos_dev/config-core-all.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-cuda.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-omp.sh create mode 100755 lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh create mode 100644 lib/kokkos/config/master_history.txt create mode 100755 lib/kokkos/config/nvcc_wrapper create mode 100755 lib/kokkos/config/snapshot.py create mode 100755 lib/kokkos/config/test_all_sandia create mode 100644 lib/kokkos/config/testing_scripts/README create mode 100755 lib/kokkos/config/testing_scripts/jenkins_test_driver create mode 100755 lib/kokkos/config/testing_scripts/obj_size_opt_check create mode 100644 lib/kokkos/containers/CMakeLists.txt create mode 100644 lib/kokkos/containers/cmake/Dependencies.cmake create mode 100644 lib/kokkos/containers/cmake/KokkosContainers_config.h.in create mode 100644 lib/kokkos/containers/performance_tests/CMakeLists.txt create mode 100644 lib/kokkos/containers/performance_tests/Makefile create mode 100644 lib/kokkos/containers/performance_tests/TestCuda.cpp create mode 100644 lib/kokkos/containers/performance_tests/TestDynRankView.hpp create mode 100644 lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp create mode 100644 lib/kokkos/containers/performance_tests/TestMain.cpp create mode 100644 lib/kokkos/containers/performance_tests/TestOpenMP.cpp create mode 100644 lib/kokkos/containers/performance_tests/TestThreads.cpp create mode 100644 lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp create mode 100644 lib/kokkos/containers/src/CMakeLists.txt create mode 100644 lib/kokkos/containers/src/Kokkos_Bitset.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_DualView.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_DynRankView.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_DynamicView.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_Functional.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_SegmentedView.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_StaticCrsGraph.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_UnorderedMap.hpp create mode 100644 lib/kokkos/containers/src/Kokkos_Vector.hpp create mode 100644 lib/kokkos/containers/src/impl/Kokkos_Bitset_impl.hpp create mode 100644 lib/kokkos/containers/src/impl/Kokkos_Functional_impl.hpp create mode 100644 lib/kokkos/containers/src/impl/Kokkos_StaticCrsGraph_factory.hpp create mode 100644 lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.cpp create mode 100644 lib/kokkos/containers/src/impl/Kokkos_UnorderedMap_impl.hpp create mode 100644 lib/kokkos/containers/unit_tests/CMakeLists.txt create mode 100644 lib/kokkos/containers/unit_tests/Makefile create mode 100644 lib/kokkos/containers/unit_tests/TestBitset.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestComplex.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestCuda.cpp create mode 100644 lib/kokkos/containers/unit_tests/TestDualView.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestDynViewAPI.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestDynamicView.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestOpenMP.cpp create mode 100644 lib/kokkos/containers/unit_tests/TestSegmentedView.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestSerial.cpp create mode 100644 lib/kokkos/containers/unit_tests/TestStaticCrsGraph.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestThreads.cpp create mode 100644 lib/kokkos/containers/unit_tests/TestUnorderedMap.hpp create mode 100644 lib/kokkos/containers/unit_tests/TestVector.hpp create mode 100644 lib/kokkos/containers/unit_tests/UnitTestMain.cpp create mode 100644 lib/kokkos/core/CMakeLists.txt create mode 100644 lib/kokkos/core/cmake/Dependencies.cmake create mode 100644 lib/kokkos/core/cmake/KokkosCore_config.h.in create mode 100644 lib/kokkos/core/perf_test/CMakeLists.txt create mode 100644 lib/kokkos/core/perf_test/Makefile create mode 100644 lib/kokkos/core/perf_test/PerfTestBlasKernels.hpp create mode 100644 lib/kokkos/core/perf_test/PerfTestCuda.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTestDriver.hpp create mode 100644 lib/kokkos/core/perf_test/PerfTestGramSchmidt.hpp create mode 100644 lib/kokkos/core/perf_test/PerfTestHexGrad.hpp create mode 100644 lib/kokkos/core/perf_test/PerfTestHost.cpp create mode 100644 lib/kokkos/core/perf_test/PerfTestMain.cpp create mode 100644 lib/kokkos/core/perf_test/test_atomic.cpp create mode 100644 lib/kokkos/core/src/CMakeLists.txt create mode 100644 lib/kokkos/core/src/Cuda/KokkosExp_Cuda_View.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Alloc.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Error.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Internal.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_TaskPolicy.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_Vectorization.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp create mode 100644 lib/kokkos/core/src/Cuda/Kokkos_Cuda_abort.hpp create mode 100644 lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Array.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Atomic.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Complex.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Concepts.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Core.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Core_fwd.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Cuda.hpp create mode 100644 lib/kokkos/core/src/Kokkos_CudaSpace.hpp create mode 100644 lib/kokkos/core/src/Kokkos_ExecPolicy.hpp create mode 100644 lib/kokkos/core/src/Kokkos_HBWSpace.hpp create mode 100644 lib/kokkos/core/src/Kokkos_HostSpace.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Layout.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Macros.hpp create mode 100644 lib/kokkos/core/src/Kokkos_MemoryPool.hpp create mode 100644 lib/kokkos/core/src/Kokkos_MemoryTraits.hpp create mode 100644 lib/kokkos/core/src/Kokkos_OpenMP.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Pair.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Parallel.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Qthread.hpp create mode 100644 lib/kokkos/core/src/Kokkos_ScratchSpace.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Serial.hpp create mode 100644 lib/kokkos/core/src/Kokkos_TaskPolicy.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Threads.hpp create mode 100644 lib/kokkos/core/src/Kokkos_Vectorization.hpp create mode 100644 lib/kokkos/core/src/Kokkos_View.hpp create mode 100644 lib/kokkos/core/src/Kokkos_hwloc.hpp create mode 100644 lib/kokkos/core/src/Makefile create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.cpp create mode 100644 lib/kokkos/core/src/OpenMP/Kokkos_OpenMPexec.hpp create mode 100644 lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.cpp create mode 100644 lib/kokkos/core/src/Qthread/Kokkos_QthreadExec.hpp create mode 100644 lib/kokkos/core/src/Qthread/Kokkos_Qthread_Parallel.hpp create mode 100644 lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp create mode 100644 lib/kokkos/core/src/Qthread/Kokkos_Qthread_TaskPolicy.hpp create mode 100644 lib/kokkos/core/src/Qthread/README create mode 100644 lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_ThreadsExec_base.cpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp create mode 100644 lib/kokkos/core/src/Threads/Kokkos_Threads_TaskPolicy.hpp create mode 100644 lib/kokkos/core/src/impl/CMakeLists.txt create mode 100644 lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.cpp create mode 100644 lib/kokkos/core/src/impl/KokkosExp_SharedAlloc.hpp create mode 100644 lib/kokkos/core/src/impl/KokkosExp_ViewArray.hpp create mode 100644 lib/kokkos/core/src/impl/KokkosExp_ViewCtor.hpp create mode 100644 lib/kokkos/core/src/impl/KokkosExp_ViewMapping.hpp create mode 100644 lib/kokkos/core/src/impl/KokkosExp_ViewTile.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_AnalyzeShape.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Assembly.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Generic.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_View.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Atomic_Windows.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_BitOps.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_CPUDiscovery.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Core.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Error.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Error.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ExecPolicy.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_HBWAllocators.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Memory_Fence.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_PhysicalLayout.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Profiling_DeviceInfo.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Serial.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Serial_TaskPolicy.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Shape.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Shape.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Singleton.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_StaticAssert.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Synchronic.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Synchronic_Config.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Synchronic_n3998.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Tags.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Timer.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Traits.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ViewDefault.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ViewOffset.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ViewSupport.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_ViewTileLeft.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_Volatile_Load.hpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_hwloc.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_spinwait.cpp create mode 100644 lib/kokkos/core/src/impl/Kokkos_spinwait.hpp create mode 100644 lib/kokkos/core/unit_test/CMakeLists.txt create mode 100644 lib/kokkos/core/unit_test/Makefile create mode 100644 lib/kokkos/core/unit_test/TestAggregate.hpp create mode 100644 lib/kokkos/core/unit_test/TestAggregateReduction.hpp create mode 100644 lib/kokkos/core/unit_test/TestAtomic.hpp create mode 100644 lib/kokkos/core/unit_test/TestAtomicOperations.hpp create mode 100644 lib/kokkos/core/unit_test/TestCXX11.hpp create mode 100644 lib/kokkos/core/unit_test/TestCXX11Deduction.hpp create mode 100644 lib/kokkos/core/unit_test/TestCompilerMacros.hpp create mode 100644 lib/kokkos/core/unit_test/TestCuda.cpp create mode 100644 lib/kokkos/core/unit_test/TestCuda_a.cpp create mode 100644 lib/kokkos/core/unit_test/TestCuda_b.cpp create mode 100644 lib/kokkos/core/unit_test/TestCuda_c.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceType.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_1.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_10.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_11.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_12.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_13.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_14.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_15.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_16.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_2.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_3.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_4.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_5.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_6.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_7.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_8.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit_9.cpp create mode 100644 lib/kokkos/core/unit_test/TestDefaultDeviceType_a.cpp create mode 100644 lib/kokkos/core/unit_test/TestHWLOC.cpp create mode 100644 lib/kokkos/core/unit_test/TestMDRange.hpp create mode 100644 lib/kokkos/core/unit_test/TestMemoryPool.hpp create mode 100644 lib/kokkos/core/unit_test/TestMemorySpaceTracking.hpp create mode 100644 lib/kokkos/core/unit_test/TestOpenMP.cpp create mode 100644 lib/kokkos/core/unit_test/TestOpenMP_a.cpp create mode 100644 lib/kokkos/core/unit_test/TestOpenMP_b.cpp create mode 100644 lib/kokkos/core/unit_test/TestOpenMP_c.cpp create mode 100644 lib/kokkos/core/unit_test/TestPolicyConstruction.hpp create mode 100644 lib/kokkos/core/unit_test/TestQthread.cpp create mode 100644 lib/kokkos/core/unit_test/TestRange.hpp create mode 100644 lib/kokkos/core/unit_test/TestReduce.hpp create mode 100644 lib/kokkos/core/unit_test/TestScan.hpp create mode 100644 lib/kokkos/core/unit_test/TestSerial.cpp create mode 100644 lib/kokkos/core/unit_test/TestSharedAlloc.hpp create mode 100644 lib/kokkos/core/unit_test/TestSynchronic.cpp create mode 100644 lib/kokkos/core/unit_test/TestSynchronic.hpp create mode 100644 lib/kokkos/core/unit_test/TestTaskPolicy.hpp create mode 100644 lib/kokkos/core/unit_test/TestTeam.hpp create mode 100644 lib/kokkos/core/unit_test/TestTeamVector.hpp create mode 100644 lib/kokkos/core/unit_test/TestTemplateMetaFunctions.hpp create mode 100644 lib/kokkos/core/unit_test/TestThreads.cpp create mode 100644 lib/kokkos/core/unit_test/TestTile.hpp create mode 100644 lib/kokkos/core/unit_test/TestViewAPI.hpp create mode 100644 lib/kokkos/core/unit_test/TestViewImpl.hpp create mode 100644 lib/kokkos/core/unit_test/TestViewMapping.hpp create mode 100644 lib/kokkos/core/unit_test/TestViewOfClass.hpp create mode 100644 lib/kokkos/core/unit_test/TestViewSubview.hpp create mode 100644 lib/kokkos/core/unit_test/UnitTestMain.cpp create mode 100644 lib/kokkos/doc/Doxyfile create mode 100644 lib/kokkos/doc/Kokkos_PG.pdf create mode 100644 lib/kokkos/doc/README create mode 100755 lib/kokkos/doc/build_docs create mode 100644 lib/kokkos/doc/index.doc create mode 100644 lib/kokkos/example/CMakeLists.txt create mode 100644 lib/kokkos/example/README create mode 100644 lib/kokkos/example/cmake/Dependencies.cmake create mode 100644 lib/kokkos/example/common/VectorImport.hpp create mode 100644 lib/kokkos/example/common/WrapMPI.hpp create mode 100644 lib/kokkos/example/feint/CMakeLists.txt create mode 100644 lib/kokkos/example/feint/ElemFunctor.hpp create mode 100644 lib/kokkos/example/feint/Makefile create mode 100644 lib/kokkos/example/feint/feint.hpp create mode 100644 lib/kokkos/example/feint/feint_cuda.cpp create mode 100644 lib/kokkos/example/feint/feint_fwd.hpp create mode 100644 lib/kokkos/example/feint/feint_openmp.cpp create mode 100644 lib/kokkos/example/feint/feint_threads.cpp create mode 100644 lib/kokkos/example/feint/main.cpp create mode 100644 lib/kokkos/example/fenl/CGSolve.hpp create mode 100644 lib/kokkos/example/fenl/CMakeLists.txt create mode 100644 lib/kokkos/example/fenl/Makefile create mode 100644 lib/kokkos/example/fenl/fenl.cpp create mode 100644 lib/kokkos/example/fenl/fenl.hpp create mode 100644 lib/kokkos/example/fenl/fenl_functors.hpp create mode 100644 lib/kokkos/example/fenl/fenl_impl.hpp create mode 100644 lib/kokkos/example/fenl/main.cpp create mode 100644 lib/kokkos/example/fixture/BoxElemFixture.hpp create mode 100644 lib/kokkos/example/fixture/BoxElemPart.cpp create mode 100644 lib/kokkos/example/fixture/BoxElemPart.hpp create mode 100644 lib/kokkos/example/fixture/CMakeLists.txt create mode 100644 lib/kokkos/example/fixture/HexElement.hpp create mode 100644 lib/kokkos/example/fixture/Main.cpp create mode 100644 lib/kokkos/example/fixture/Makefile create mode 100644 lib/kokkos/example/fixture/TestFixture.cpp create mode 100644 lib/kokkos/example/fixture/TestFixture.hpp create mode 100644 lib/kokkos/example/global_2_local_ids/CMakeLists.txt create mode 100644 lib/kokkos/example/global_2_local_ids/G2L.hpp create mode 100644 lib/kokkos/example/global_2_local_ids/G2L_Main.cpp create mode 100644 lib/kokkos/example/global_2_local_ids/Makefile create mode 100644 lib/kokkos/example/grow_array/CMakeLists.txt create mode 100644 lib/kokkos/example/grow_array/Makefile create mode 100644 lib/kokkos/example/grow_array/grow_array.hpp create mode 100644 lib/kokkos/example/grow_array/main.cpp create mode 100644 lib/kokkos/example/ichol/Makefile create mode 100644 lib/kokkos/example/ichol/example/example_chol_performance_device.hpp create mode 100644 lib/kokkos/example/ichol/example/example_chol_performance_device_cuda.cpp create mode 100644 lib/kokkos/example/ichol/example/example_chol_performance_device_pthread.cpp create mode 100644 lib/kokkos/example/ichol/src/chol.hpp create mode 100644 lib/kokkos/example/ichol/src/chol_u.hpp create mode 100644 lib/kokkos/example/ichol/src/chol_u_right_look_by_blocks.hpp create mode 100644 lib/kokkos/example/ichol/src/chol_u_unblocked_opt1.hpp create mode 100644 lib/kokkos/example/ichol/src/chol_u_unblocked_opt2.hpp create mode 100644 lib/kokkos/example/ichol/src/control.hpp create mode 100644 lib/kokkos/example/ichol/src/coo.hpp create mode 100644 lib/kokkos/example/ichol/src/crs_matrix_base.hpp create mode 100644 lib/kokkos/example/ichol/src/crs_matrix_base_import.hpp create mode 100644 lib/kokkos/example/ichol/src/crs_matrix_helper.hpp create mode 100644 lib/kokkos/example/ichol/src/crs_matrix_helper_impl.hpp create mode 100644 lib/kokkos/example/ichol/src/crs_matrix_view.hpp create mode 100644 lib/kokkos/example/ichol/src/crs_row_view.hpp create mode 100644 lib/kokkos/example/ichol/src/dot.hpp create mode 100644 lib/kokkos/example/ichol/src/gemm.hpp create mode 100644 lib/kokkos/example/ichol/src/gemm_ct_nt.hpp create mode 100644 lib/kokkos/example/ichol/src/gemm_ct_nt_for_factor_blocked.hpp create mode 100644 lib/kokkos/example/ichol/src/graph_helper_scotch.hpp create mode 100644 lib/kokkos/example/ichol/src/herk.hpp create mode 100644 lib/kokkos/example/ichol/src/herk_u_ct.hpp create mode 100644 lib/kokkos/example/ichol/src/herk_u_ct_for_factor_blocked.hpp create mode 100644 lib/kokkos/example/ichol/src/norm.hpp create mode 100644 lib/kokkos/example/ichol/src/partition.hpp create mode 100644 lib/kokkos/example/ichol/src/scale.hpp create mode 100644 lib/kokkos/example/ichol/src/symbolic_factor_helper.hpp create mode 100644 lib/kokkos/example/ichol/src/symbolic_task.hpp create mode 100644 lib/kokkos/example/ichol/src/task_factory.hpp create mode 100644 lib/kokkos/example/ichol/src/task_view.hpp create mode 100644 lib/kokkos/example/ichol/src/trsm.hpp create mode 100644 lib/kokkos/example/ichol/src/trsm_l_u_ct.hpp create mode 100644 lib/kokkos/example/ichol/src/trsm_l_u_ct_for_factor_blocked.hpp create mode 100644 lib/kokkos/example/ichol/src/util.cpp create mode 100644 lib/kokkos/example/ichol/src/util.hpp create mode 100644 lib/kokkos/example/md_skeleton/CMakeLists.txt create mode 100644 lib/kokkos/example/md_skeleton/Makefile create mode 100644 lib/kokkos/example/md_skeleton/README create mode 100644 lib/kokkos/example/md_skeleton/force.cpp create mode 100644 lib/kokkos/example/md_skeleton/main.cpp create mode 100644 lib/kokkos/example/md_skeleton/neighbor.cpp create mode 100644 lib/kokkos/example/md_skeleton/setup.cpp create mode 100644 lib/kokkos/example/md_skeleton/system.h create mode 100644 lib/kokkos/example/md_skeleton/types.h create mode 100644 lib/kokkos/example/multi_fem/BoxMeshFixture.hpp create mode 100644 lib/kokkos/example/multi_fem/BoxMeshPartition.cpp create mode 100644 lib/kokkos/example/multi_fem/BoxMeshPartition.hpp create mode 100644 lib/kokkos/example/multi_fem/CMakeLists.txt create mode 100644 lib/kokkos/example/multi_fem/Explicit.hpp create mode 100644 lib/kokkos/example/multi_fem/ExplicitFunctors.hpp create mode 100644 lib/kokkos/example/multi_fem/FEMesh.hpp create mode 100644 lib/kokkos/example/multi_fem/HexElement.hpp create mode 100644 lib/kokkos/example/multi_fem/HexExplicitFunctions.hpp create mode 100644 lib/kokkos/example/multi_fem/Implicit.hpp create mode 100644 lib/kokkos/example/multi_fem/ImplicitFunctors.hpp create mode 100644 lib/kokkos/example/multi_fem/LinAlgBLAS.hpp create mode 100644 lib/kokkos/example/multi_fem/Makefile create mode 100644 lib/kokkos/example/multi_fem/Nonlinear.hpp create mode 100644 lib/kokkos/example/multi_fem/NonlinearElement_Cuda.hpp create mode 100644 lib/kokkos/example/multi_fem/NonlinearFunctors.hpp create mode 100644 lib/kokkos/example/multi_fem/ParallelComm.hpp create mode 100644 lib/kokkos/example/multi_fem/ParallelDataMap.hpp create mode 100644 lib/kokkos/example/multi_fem/ParallelMachine.cpp create mode 100644 lib/kokkos/example/multi_fem/ParallelMachine.hpp create mode 100644 lib/kokkos/example/multi_fem/SparseLinearSystem.hpp create mode 100644 lib/kokkos/example/multi_fem/SparseLinearSystemFill.hpp create mode 100644 lib/kokkos/example/multi_fem/SparseLinearSystem_Cuda.hpp create mode 100644 lib/kokkos/example/multi_fem/TestBoxMeshFixture.hpp create mode 100644 lib/kokkos/example/multi_fem/TestBoxMeshPartition.cpp create mode 100644 lib/kokkos/example/multi_fem/TestCuda.cpp create mode 100644 lib/kokkos/example/multi_fem/TestHost.cpp create mode 100644 lib/kokkos/example/multi_fem/TestHybridFEM.cpp create mode 100644 lib/kokkos/example/query_device/CMakeLists.txt create mode 100644 lib/kokkos/example/query_device/Makefile create mode 100644 lib/kokkos/example/query_device/query_device.cpp create mode 100644 lib/kokkos/example/sort_array/CMakeLists.txt create mode 100644 lib/kokkos/example/sort_array/Makefile create mode 100644 lib/kokkos/example/sort_array/main.cpp create mode 100644 lib/kokkos/example/sort_array/sort_array.hpp create mode 100644 lib/kokkos/example/tutorial/01_hello_world/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/01_hello_world/Makefile create mode 100644 lib/kokkos/example/tutorial/01_hello_world/hello_world.cpp create mode 100644 lib/kokkos/example/tutorial/01_hello_world_lambda/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/01_hello_world_lambda/Makefile create mode 100644 lib/kokkos/example/tutorial/01_hello_world_lambda/hello_world_lambda.cpp create mode 100644 lib/kokkos/example/tutorial/02_simple_reduce/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/02_simple_reduce/Makefile create mode 100644 lib/kokkos/example/tutorial/02_simple_reduce/simple_reduce.cpp create mode 100644 lib/kokkos/example/tutorial/02_simple_reduce_lambda/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/02_simple_reduce_lambda/Makefile create mode 100644 lib/kokkos/example/tutorial/02_simple_reduce_lambda/simple_reduce_lambda.cpp create mode 100644 lib/kokkos/example/tutorial/03_simple_view/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/03_simple_view/Makefile create mode 100644 lib/kokkos/example/tutorial/03_simple_view/simple_view.cpp create mode 100644 lib/kokkos/example/tutorial/03_simple_view_lambda/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/03_simple_view_lambda/Makefile create mode 100644 lib/kokkos/example/tutorial/03_simple_view_lambda/simple_view_lambda.cpp create mode 100644 lib/kokkos/example/tutorial/04_simple_memoryspaces/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/04_simple_memoryspaces/Makefile create mode 100644 lib/kokkos/example/tutorial/04_simple_memoryspaces/simple_memoryspaces.cpp create mode 100644 lib/kokkos/example/tutorial/05_simple_atomics/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/05_simple_atomics/Makefile create mode 100644 lib/kokkos/example/tutorial/05_simple_atomics/simple_atomics.cpp create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/01_data_layouts/data_layouts.cpp create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/02_memory_traits/memory_traits.cpp create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/03_subviews/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/03_subviews/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/03_subviews/subviews.cpp create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/04_dualviews/dual_view.cpp create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/05_NVIDIA_UVM/uvm_example.cpp create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/06_AtomicViews/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/overlapping_deepcopy.cpp create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Advanced_Views/Makefile create mode 100644 lib/kokkos/example/tutorial/Algorithms/01_random_numbers/Makefile create mode 100644 lib/kokkos/example/tutorial/Algorithms/01_random_numbers/random_numbers.cpp create mode 100644 lib/kokkos/example/tutorial/Algorithms/Makefile create mode 100644 lib/kokkos/example/tutorial/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams/thread_teams.cpp create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/thread_teams_lambda.cpp create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/nested_parallel_for.cpp create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/03_vectorization/vectorization.cpp create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/04_team_scan/team_scan.cpp create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/CMakeLists.txt create mode 100644 lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile create mode 100644 lib/kokkos/example/tutorial/Makefile create mode 100644 lib/kokkos/example/tutorial/README create mode 100755 lib/kokkos/generate_makefile.bash create mode 100644 lib/kokkos/tpls/gtest/gtest/LICENSE create mode 100644 lib/kokkos/tpls/gtest/gtest/README create mode 100644 lib/kokkos/tpls/gtest/gtest/gtest-all.cc create mode 120000 lib/kokkos/tpls/gtest/gtest/gtest-test-part.h create mode 100644 lib/kokkos/tpls/gtest/gtest/gtest.h diff --git a/lib/kokkos/.gitignore b/lib/kokkos/.gitignore new file mode 100644 index 0000000000..f9d16be155 --- /dev/null +++ b/lib/kokkos/.gitignore @@ -0,0 +1,8 @@ +# Standard ignores +*~ +*.pyc +\#*# +.#* +.*.swp +.cproject +.project diff --git a/lib/kokkos/CMakeLists.txt b/lib/kokkos/CMakeLists.txt new file mode 100644 index 0000000000..1219352f73 --- /dev/null +++ b/lib/kokkos/CMakeLists.txt @@ -0,0 +1,184 @@ + +IF(COMMAND TRIBITS_PACKAGE_DECL) + SET(KOKKOS_HAS_TRILINOS ON CACHE BOOL "") +ELSE() + SET(KOKKOS_HAS_TRILINOS OFF CACHE BOOL "") +ENDIF() + +IF(NOT KOKKOS_HAS_TRILINOS) + CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR) + INCLUDE(cmake/tribits.cmake) +ENDIF() + +# +# A) Forward delcare the package so that certain options are also defined for +# subpackages +# + +TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS) + +#------------------------------------------------------------------------------ +# +# B) Define the common options for Kokkos first so they can be used by +# subpackages as well. +# + + + +# mfh 01 Aug 2016: See Issue #61: +# +# https://github.com/kokkos/kokkos/issues/61 +# +# Don't use TRIBITS_ADD_DEBUG_OPTION() here, because that defines +# HAVE_KOKKOS_DEBUG. We define KOKKOS_HAVE_DEBUG here instead, +# for compatibility with Kokkos' Makefile build system. + +TRIBITS_ADD_OPTION_AND_DEFINE( + ${PACKAGE_NAME}_ENABLE_DEBUG + ${PACKAGE_NAME_UC}_HAVE_DEBUG + "Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build." + ${${PROJECT_NAME}_ENABLE_DEBUG} +) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_SIERRA_BUILD + KOKKOS_FOR_SIERRA + "Configure Kokkos for building within the Sierra build system." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda + KOKKOS_HAVE_CUDA + "Enable CUDA support in Kokkos." + "${TPL_ENABLE_CUDA}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Cuda_UVM + KOKKOS_USE_CUDA_UVM + "Enable CUDA Unified Virtual Memory support in Kokkos." + OFF + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Pthread + KOKKOS_HAVE_PTHREAD + "Enable Pthread support in Kokkos." + OFF + ) + +ASSERT_DEFINED(TPL_ENABLE_Pthread) +IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread) + MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.") +ENDIF () + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_OpenMP + KOKKOS_HAVE_OPENMP + "Enable OpenMP support in Kokkos." + "${${PROJECT_NAME}_ENABLE_OpenMP}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_QTHREAD + KOKKOS_HAVE_QTHREAD + "Enable QTHREAD support in Kokkos." + "${TPL_ENABLE_QTHREAD}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_CXX11 + KOKKOS_HAVE_CXX11 + "Enable C++11 support in Kokkos." + "${${PROJECT_NAME}_ENABLE_CXX11}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_HWLOC + KOKKOS_HAVE_HWLOC + "Enable HWLOC support in Kokkos." + "${TPL_ENABLE_HWLOC}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_MPI + KOKKOS_HAVE_MPI + "Enable MPI support in Kokkos." + "${TPL_ENABLE_MPI}" + ) + +# Set default value of Kokkos_ENABLE_Debug_Bounds_Check option +# +# CMake is case sensitive. The Kokkos_ENABLE_Debug_Bounds_Check +# option (defined below) is annoyingly not all caps, but we need to +# keep it that way for backwards compatibility. If users forget and +# try using an all-caps variable, then make it count by using the +# all-caps version as the default value of the original, not-all-caps +# option. Otherwise, the default value of this option comes from +# Kokkos_ENABLE_DEBUG (see Issue #367). + +ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_DEBUG) +IF(DEFINED Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) + IF(Kokkos_ENABLE_DEBUG_BOUNDS_CHECK) + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT ON) + ELSE() + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") + ENDIF() +ELSE() + SET(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT "${${PACKAGE_NAME}_ENABLE_DEBUG}") +ENDIF() +ASSERT_DEFINED(Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Debug_Bounds_Check + KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK + "Enable Kokkos::View run-time bounds checking." + "${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}" + ) + +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Profiling + KOKKOS_ENABLE_PROFILING_INTERNAL + "Enable KokkosP profiling support for kernel data collections." + "${TPL_ENABLE_DLlib}" + ) + +# placeholder for future device... +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_ENABLE_Winthread + KOKKOS_HAVE_WINTHREAD + "Enable Winthread support in Kokkos." + "${TPL_ENABLE_Winthread}" + ) + +# use new/old View +TRIBITS_ADD_OPTION_AND_DEFINE( + Kokkos_USING_DEPRECATED_VIEW + KOKKOS_USING_DEPRECATED_VIEW + "Choose whether to use the old, deprecated Kokkos::View" + OFF + ) + +#------------------------------------------------------------------------------ +# +# C) Process the subpackages for Kokkos +# + +TRIBITS_PROCESS_SUBPACKAGES() + +# +# D) If Kokkos itself is enabled, process the Kokkos package +# + +TRIBITS_PACKAGE_DEF() + +TRIBITS_EXCLUDE_AUTOTOOLS_FILES() + +TRIBITS_EXCLUDE_FILES( + classic/doc + classic/LinAlg/doc/CrsRefactorNotesMay2012 + ) + +TRIBITS_PACKAGE_POSTPROCESS() + diff --git a/lib/kokkos/Copyright.txt b/lib/kokkos/Copyright.txt new file mode 100644 index 0000000000..05980758fa --- /dev/null +++ b/lib/kokkos/Copyright.txt @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER diff --git a/lib/kokkos/HOW_TO_SNAPSHOT b/lib/kokkos/HOW_TO_SNAPSHOT new file mode 100644 index 0000000000..46bfb4167f --- /dev/null +++ b/lib/kokkos/HOW_TO_SNAPSHOT @@ -0,0 +1,73 @@ + +Developers of Kokkos (those who commit modifications to Kokkos) +must maintain the snapshot of Kokkos in the Trilinos repository. + +This file contains instructions for how to +snapshot Kokkos from github.com/kokkos to Trilinos. + +------------------------------------------------------------------------ +*** EVERYTHING GOES RIGHT WORKFLOW *** + +1) Given a 'git clone' of Kokkos and of Trilinos repositories. +1.1) Let ${KOKKOS} be the absolute path to the Kokkos clone. + This path *must* terminate with the directory name 'kokkos'; + e.g., ${HOME}/kokkos . +1.2) Let ${TRILINOS} be the absolute path to the Trilinos directory. + +2) Given that the Kokkos build & test is clean and + changes are committed to the Kokkos clone. + +3) Snapshot the current commit in the Kokkos clone into the Trilinos clone. + This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}: + ${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages + +4) Verify the snapshot commit happened as expected + cd ${TRILINOS}/packages/kokkos + git log -1 --name-only + +5) Modify, build, and test Trilinos with the Kokkos snapshot. + +6) Given that that the Trilinos build & test is clean and + changes are committed to the Trilinos clone. + +7) Attempt push to the Kokkos repository. + If push fails then you must 'remove the Kokkos snapshot' + from your Trilinos clone. + See below. + +8) Attempt to push to the Trilinos repository. + If updating for a failed push requires you to change Kokkos you must + 'remove the Kokkos snapshot' from your Trilinos clone. + See below. + +------------------------------------------------------------------------ +*** WHEN SOMETHING GOES WRONG AND YOU MUST *** +*** REMOVE THE KOKKOS SNAPSHOT FROM YOUR TRILINOS CLONE *** + +1) Query the Trilinos clone commit log. + git log --oneline + +2) Note the of the commit to the Trillinos clone + immediately BEFORE the Kokkos snapshot commit. + Copy this for use in the next command. + +3) IF more than one outstanding commit then you can remove just the + Kokkos snapshot commit with 'git rebase -i'. Edit the rebase file. + Remove or comment out the Kokkos snapshot commit entry. + git rebase -i + +4) IF the Kokkos snapshot commit is the one and only + outstanding commit then remove just than commit. + git reset --hard HEAD~1 + +------------------------------------------------------------------------ +*** REGARDING 'snapshot.py' TOOL *** + +The 'snapshot.py' tool is developed and maintained by the +Center for Computing Research (CCR) +Software Engineering, Maintenance, and Support (SEMS) team. + +Contact Brent Perschbacher for questions> + +------------------------------------------------------------------------ + diff --git a/lib/kokkos/LICENSE b/lib/kokkos/LICENSE new file mode 100644 index 0000000000..05980758fa --- /dev/null +++ b/lib/kokkos/LICENSE @@ -0,0 +1,40 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos new file mode 100644 index 0000000000..c9b6cc464d --- /dev/null +++ b/lib/kokkos/Makefile.kokkos @@ -0,0 +1,480 @@ +# Default settings common options + +#LAMMPS specific settings: +KOKKOS_PATH=../../lib/kokkos +CXXFLAGS=$(CCFLAGS) + +#Options: OpenMP,Serial,Pthreads,Cuda +KOKKOS_DEVICES ?= "OpenMP" +#KOKKOS_DEVICES ?= "Pthreads" +#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW +KOKKOS_ARCH ?= "" +#Options: yes,no +KOKKOS_DEBUG ?= "no" +#Options: hwloc,librt,experimental_memkind +KOKKOS_USE_TPLS ?= "" +#Options: c++11 +KOKKOS_CXX_STANDARD ?= "c++11" +#Options: aggressive_vectorization,disable_profiling +KOKKOS_OPTIONS ?= "" + +#Default settings specific options +#Options: force_uvm,use_ldg,rdc,enable_lambda +KOKKOS_CUDA_OPTIONS ?= "" + +# Check for general settings + +KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l)) +KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l)) + +# Check for external libraries +KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l)) +KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "librt" | wc -l)) +KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) + +# Check for advanced settings +KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) +KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l)) +KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l)) + +# Check for Kokkos Host Execution Spaces one of which must be on + +KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l)) +KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l)) +KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l)) +KOKKOS_INTERNAL_USE_QTHREAD := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthread | wc -l)) + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0) +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0) + KOKKOS_INTERNAL_USE_SERIAL := 1 +endif +endif + +KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l) +KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l) +KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l) +KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l) +KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l) + +ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -mp +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + # OpenMP is turned on by default in Cray compiler environment + KOKKOS_INTERNAL_OPENMP_FLAG := + else + KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_INTERNAL_CXX11_FLAG := --c++11 +else + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_CXX11_FLAG := -std=c++11 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11 + else + KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11 + endif + endif +endif + +# Check for other Execution Spaces +KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l)) + +# Check for Kokkos Architecture settings + +#Intel based +KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l)) + +#NVIDIA based +NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper +KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER37 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler37 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell50 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0) +KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \ + + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \ + + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \ + + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc)) +endif + +#ARM based +KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l)) + +#IBM based +KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_POWER7 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power7 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_POWER8 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Power8 | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BGQ)+$(KOKKOS_INTERNAL_USE_ARCH_POWER7)+$(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc)) + +#AMD based +KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) + +#Any AVX? +KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) +KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) + +# Decide what ISA level we are able to support +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) +KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) +KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc )) + +#Incompatible flags? +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc )) +KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) + $(error Defined Multiple Host architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIGPU), 1) + $(error Defined Multiple GPU architectures: KOKKOS_ARCH=$(KOKKOS_ARCH) ) +endif + +#Generating the list of Flags + +KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src + +# No warnings: +KOKKOS_CXXFLAGS = +# INTEL and CLANG warnings: +#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized +# GCC warnings: +#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered + +KOKKOS_LIBS = -lkokkos -ldl +KOKKOS_LDFLAGS = -L$(shell pwd) +KOKKOS_SRC = +KOKKOS_HEADERS = + +#Generating the KokkosCore_config.h file + +tmp := $(shell echo "/* ---------------------------------------------" > KokkosCore_config.tmp) +tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.tmp) +tmp := $(shell date >> KokkosCore_config.tmp) +tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp) + + +tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp) +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_PTHREAD 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1) + tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + KOKKOS_CPPFLAGS += -I$(QTHREAD_PATH)/include + KOKKOS_LDFLAGS += -L$(QTHREAD_PATH)/lib + tmp := $(shell echo "\#define KOKKOS_HAVE_QTHREAD 1" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp) +ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG) + tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1) +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -G +endif + KOKKOS_CXXFLAGS += -g + KOKKOS_LDFLAGS += -g -ldl + tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1) + KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include + KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib + KOKKOS_LIBS += -lhwloc + tmp := $(shell echo "\#define KOKKOS_HAVE_HWLOC 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1) + tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp ) + KOKKOS_LIBS += -lrt +endif + +ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1) + KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include + KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib + KOKKOS_LIBS += -lmemkind + tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1) + tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING 0" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1) + tmp := $(shell echo "\#define KOKKOS_OPT_RANGE_AGGRESSIVE_VECTORIZATION 1" >> KokkosCore_config.tmp ) +endif + +tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp) + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp ) +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += --relocatable-device-code=true + KOKKOS_LDFLAGS += --relocatable-device-code=true +endif + +ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1) + tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -expt-extended-lambda +endif + +#Add Architecture flags + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + KOKKOS_CXXFLAGS += + KOKKOS_LDFLAGS += + else + KOKKOS_CXXFLAGS += -mavx + KOKKOS_LDFLAGS += -mavx + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8 + KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8 +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xCORE-AVX2 + KOKKOS_LDFLAGS += -xCORE-AVX2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Assume that this is a really a GNU compiler + KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 + KOKKOS_LDFLAGS += -march=core-avx2 -mtune=core-avx2 + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp ) + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xMIC-AVX512 + KOKKOS_LDFLAGS += -xMIC-AVX512 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + + else + # Asssume that this is really a GNU compiler + KOKKOS_CXXFLAGS += -march=knl + KOKKOS_LDFLAGS += -march=knl + endif + endif + endif +endif + +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -mmic + KOKKOS_LDFLAGS += -mmic +endif + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_30 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_32 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_35 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_37 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_50 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_52 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_53 +endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp ) + tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp ) + KOKKOS_CXXFLAGS += -arch=sm_61 +endif +endif + +KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) +ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) +KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) +else +KOKKOS_INTERNAL_NEW_CONFIG := 1 +endif + +ifneq ($(KOKKOS_INTERNAL_NEW_CONFIG), 0) + tmp := $(shell cp KokkosCore_config.tmp KokkosCore_config.h) +endif + +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.hpp) +KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/algorithms/src/*.hpp) + +KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/impl/*.cpp) +KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp) + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp) + KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 + KOKKOS_LIBS += -lcudart -lcuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + KOKKOS_LIBS += -lpthread + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) + KOKKOS_LIBS += -lqthread + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthread/*.hpp) +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp) + KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp) + ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG) + else + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) + endif + KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG) +endif + +#With Cygwin functions such as fdopen and fileno are not defined +#when strict ansi is enabled. strict ansi gets enabled with --std=c++11 +#though. So we hard undefine it here. Not sure if that has any bad side effects +#This is needed for gtest actually, not for Kokkos itself! +ifeq ($(KOKKOS_INTERNAL_OS_CYGWIN), 1) + KOKKOS_CXXFLAGS += -U__STRICT_ANSI__ +endif + +# Setting up dependencies + +KokkosCore_config.h: + +KOKKOS_CPP_DEPENDS := KokkosCore_config.h $(KOKKOS_HEADERS) + +KOKKOS_OBJ = $(KOKKOS_SRC:.cpp=.o) +KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ)) + +include $(KOKKOS_PATH)/Makefile.targets + +kokkos-clean: + rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a + +libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS) + ar cr libkokkos.a $(KOKKOS_OBJ_LINK) + ranlib libkokkos.a + +KOKKOS_LINK_DEPENDS=libkokkos.a diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets new file mode 100644 index 0000000000..86929ea0fe --- /dev/null +++ b/lib/kokkos/Makefile.targets @@ -0,0 +1,72 @@ +Kokkos_UnorderedMap_impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/containers/src/impl/Kokkos_UnorderedMap_impl.cpp +Kokkos_Core.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Core.cpp +Kokkos_CPUDiscovery.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_CPUDiscovery.cpp +Kokkos_Error.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Error.cpp +Kokkos_ExecPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_ExecPolicy.cpp +Kokkos_HostSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostSpace.cpp +Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp +Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp +Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp +Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp +Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp +Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp +Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp +Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp +KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp +Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) +Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp +Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp +Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp +Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) +Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp +Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp +Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1) +Kokkos_QthreadExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_QthreadExec.cpp +Kokkos_Qthread_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Qthread/Kokkos_Qthread_TaskPolicy.cpp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) +Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp +Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +endif + +Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp +Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp + diff --git a/lib/kokkos/README b/lib/kokkos/README new file mode 100644 index 0000000000..b094578af6 --- /dev/null +++ b/lib/kokkos/README @@ -0,0 +1,152 @@ +Kokkos implements a programming model in C++ for writing performance portable +applications targeting all major HPC platforms. For that purpose it provides +abstractions for both parallel execution of code and data management. +Kokkos is designed to target complex node architectures with N-level memory +hierarchies and multiple types of execution resources. It currently can use +OpenMP, Pthreads and CUDA as backend programming models. + +The core developers of Kokkos are Carter Edwards and Christian Trott +at the Computer Science Research Institute of the Sandia National +Laboratories. + +The KokkosP interface and associated tools are developed by the Application +Performance Team and Kokkos core developers at Sandia National Laboratories. + +To learn more about Kokkos consider watching one of our presentations: +GTC 2015: + http://on-demand.gputechconf.com/gtc/2015/video/S5166.html + http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf + +A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version +and feedback is greatly appreciated. + +A separate repository with extensive tutorial material can be found under +https://github.com/kokkos/kokkos-tutorials. + +If you have a patch to contribute please feel free to issue a pull request against +the develop branch. For major contributions it is better to contact us first +for guidance. + +For questions please send an email to +kokkos-users@software.sandia.gov + +For non-public questions send an email to +hcedwar(at)sandia.gov and crtrott(at)sandia.gov + +============================================================================ +====Requirements============================================================ +============================================================================ + +Primary tested compilers on X86 are: + GCC 4.7.2 + GCC 4.8.4 + GCC 4.9.2 + GCC 5.1.0 + Intel 14.0.4 + Intel 15.0.2 + Intel 16.0.1 + Clang 3.5.2 + Clang 3.6.1 + +Primary tested compilers on Power 8 are: + IBM XL 13.1.3 (OpenMP,Serial) + GCC 4.9.2 (OpenMP,Serial) + GCC 5.3.0 (OpenMP,Serial) + +Secondary tested compilers are: + CUDA 6.5 (with gcc 4.7.2) + CUDA 7.0 (with gcc 4.7.2) + CUDA 7.5 (with gcc 4.8.4) + +Other compilers working: + X86: + Intel 17.0.042 (the FENL example causes internal compiler error) + PGI 15.4 + Cygwin 2.1.0 64bit with gcc 4.9.3 + KNL: + Intel 16.2.181 (the FENL example causes internal compiler error) + Intel 17.0.042 (the FENL example causes internal compiler error) + +Known non-working combinations: + Power8: + GCC 6.1.0 + Pthreads backend + + +Primary tested compiler are passing in release mode +with warnings as errors. They also are tested with a comprehensive set of +backend combinations (i.e. OpenMP, Pthreads, Serial, OpenMP+Serial, ...). +We are using the following set of flags: +GCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits + -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized +Intel: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized +Clang: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitialized + +Secondary compilers are passing without -Werror. +Other compilers are tested occasionally, in particular when pushing from develop to +master branch, without -Werror and only for a select set of backends. + +============================================================================ +====Getting started========================================================= +============================================================================ + +In the 'example/tutorial' directory you will find step by step tutorial +examples which explain many of the features of Kokkos. They work with +simple Makefiles. To build with g++ and OpenMP simply type 'make openmp' +in the 'example/tutorial' directory. This will build all examples in the +subfolders. + +============================================================================ +====Running Unit Tests====================================================== +============================================================================ + +To run the unit tests create a build directory and run the following commands + +KOKKOS_PATH/generate_makefile.bash +make build-test +make test + +Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as +changing the device type for which to build. + +============================================================================ +====Install the library===================================================== +============================================================================ + +To install Kokkos as a library create a build directory and run the following + +KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH +make lib +make install + +KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as +changing the device type for which to build. + +============================================================================ +====CMakeFiles============================================================== +============================================================================ + +The CMake files contained in this repository require Tribits and are used +for integration with Trilinos. They do not currently support a standalone +CMake build. + +=========================================================================== +====Kokkos and CUDA UVM==================================================== +=========================================================================== + +Kokkos does support UVM as a specific memory space called CudaUVMSpace. +Allocations made with that space are accessible from host and device. +You can tell Kokkos to use that as the default space for Cuda allocations. +In either case UVM comes with a number of restrictions: +(i) You can't access allocations on the host while a kernel is potentially +running. This will lead to segfaults. To avoid that you either need to +call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or +you can set the environment variable CUDA_LAUNCH_BLOCKING=1. +Furthermore in multi socket multi GPU machines, UVM defaults to using +zero copy allocations for technical reasons related to using multiple +GPUs from the same process. If an executable doesn't do that (e.g. each +MPI rank of an application uses a single GPU [can be the same GPU for +multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1. +This will enforce proper UVM allocations, but can lead to errors if +more than a single GPU is used by a single process. + diff --git a/lib/kokkos/algorithms/CMakeLists.txt b/lib/kokkos/algorithms/CMakeLists.txt new file mode 100644 index 0000000000..7853184a54 --- /dev/null +++ b/lib/kokkos/algorithms/CMakeLists.txt @@ -0,0 +1,10 @@ + + +TRIBITS_SUBPACKAGE(Algorithms) + +ADD_SUBDIRECTORY(src) + +TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) +#TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) + +TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/lib/kokkos/algorithms/cmake/Dependencies.cmake b/lib/kokkos/algorithms/cmake/Dependencies.cmake new file mode 100644 index 0000000000..1d71d8af34 --- /dev/null +++ b/lib/kokkos/algorithms/cmake/Dependencies.cmake @@ -0,0 +1,5 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_PACKAGES KokkosCore + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in new file mode 100644 index 0000000000..67334b70f3 --- /dev/null +++ b/lib/kokkos/algorithms/cmake/KokkosAlgorithms_config.h.in @@ -0,0 +1,4 @@ +#ifndef KOKKOS_ALGORITHMS_CONFIG_H +#define KOKKOS_ALGORITHMS_CONFIG_H + +#endif diff --git a/lib/kokkos/algorithms/src/CMakeLists.txt b/lib/kokkos/algorithms/src/CMakeLists.txt new file mode 100644 index 0000000000..dfbf3323c2 --- /dev/null +++ b/lib/kokkos/algorithms/src/CMakeLists.txt @@ -0,0 +1,21 @@ + +TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +FILE(GLOB HEADERS *.hpp) +FILE(GLOB SOURCES *.cpp) +LIST(APPEND HEADERS ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME}_config.h) + +#----------------------------------------------------------------------------- + +TRIBITS_ADD_LIBRARY( + kokkosalgorithms + HEADERS ${HEADERS} + SOURCES ${SOURCES} + DEPLIBS + ) + diff --git a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp new file mode 100644 index 0000000000..e69de29bb2 diff --git a/lib/kokkos/algorithms/src/Kokkos_Random.hpp b/lib/kokkos/algorithms/src/Kokkos_Random.hpp new file mode 100644 index 0000000000..d7c06dc14b --- /dev/null +++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp @@ -0,0 +1,1751 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_RANDOM_HPP +#define KOKKOS_RANDOM_HPP + +#include +#include +#include +#include +#include + +/// \file Kokkos_Random.hpp +/// \brief Pseudorandom number generators +/// +/// These generators are based on Vigna, Sebastiano (2014). "An +/// experimental exploration of Marsaglia's xorshift generators, +/// scrambled." See: http://arxiv.org/abs/1402.6246 + +namespace Kokkos { + + /*Template functions to get equidistributed random numbers from a generator for a specific Scalar type + + template + struct rand{ + + //Max value returned by draw(Generator& gen) + KOKKOS_INLINE_FUNCTION + static Scalar max(); + + //Returns a value between zero and max() + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen); + + //Returns a value between zero and range() + //Note: for floating point values range can be larger than max() + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen, const Scalar& range){} + + //Return value between start and end + KOKKOS_INLINE_FUNCTION + static Scalar draw(Generator& gen, const Scalar& start, const Scalar& end); + }; + + The Random number generators themselves have two components a state-pool and the actual generator + A state-pool manages a number of generators, so that each active thread is able to grep its own. + This allows the generation of random numbers which are independent between threads. Note that + in contrast to CuRand none of the functions of the pool (or the generator) are collectives, + i.e. all functions can be called inside conditionals. + + template + class Pool { + public: + //The Kokkos device type + typedef Device device_type; + //The actual generator type + typedef Generator generator_type; + + //Default constructor: does not initialize a pool + Pool(); + + //Initializing constructor: calls init(seed,Device_Specific_Number); + Pool(unsigned int seed); + + //Intialize Pool with seed as a starting seed with a pool_size of num_states + //The Random_XorShift64 generator is used in serial to initialize all states, + //thus the intialization process is platform independent and deterministic. + void init(unsigned int seed, int num_states); + + //Get a generator. This will lock one of the states, guaranteeing that each thread + //will have its private generator. Note: on Cuda getting a state involves atomics, + //and is thus not deterministic! + generator_type get_state(); + + //Give a state back to the pool. This unlocks the state, and writes the modified + //state of the generator back to the pool. + void free_state(generator_type gen); + + } + + template + class Generator { + public: + //The Kokkos device type + typedef DeviceType device_type; + + //Max return values of respective [X]rand[S]() functions + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + + + //Init with a state and the idx with respect to pool. Note: in serial the + //Generator can be used by just giving it the necessary state arguments + KOKKOS_INLINE_FUNCTION + Generator (STATE_ARGUMENTS, int state_idx = 0); + + //Draw a equidistributed uint32_t in the range (0,MAX_URAND] + KOKKOS_INLINE_FUNCTION + uint32_t urand(); + + //Draw a equidistributed uint64_t in the range (0,MAX_URAND64] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(); + + //Draw a equidistributed uint32_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range); + + //Draw a equidistributed uint32_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ); + + //Draw a equidistributed uint64_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range); + + //Draw a equidistributed uint64_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ); + + //Draw a equidistributed int in the range (0,MAX_RAND] + KOKKOS_INLINE_FUNCTION + int rand(); + + //Draw a equidistributed int in the range (0,range] + KOKKOS_INLINE_FUNCTION + int rand(const int& range); + + //Draw a equidistributed int in the range (start,end] + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ); + + //Draw a equidistributed int64_t in the range (0,MAX_RAND64] + KOKKOS_INLINE_FUNCTION + int64_t rand64(); + + //Draw a equidistributed int64_t in the range (0,range] + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range); + + //Draw a equidistributed int64_t in the range (start,end] + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ); + + //Draw a equidistributed float in the range (0,1.0] + KOKKOS_INLINE_FUNCTION + float frand(); + + //Draw a equidistributed float in the range (0,range] + KOKKOS_INLINE_FUNCTION + float frand(const float& range); + + //Draw a equidistributed float in the range (start,end] + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ); + + //Draw a equidistributed double in the range (0,1.0] + KOKKOS_INLINE_FUNCTION + double drand(); + + //Draw a equidistributed double in the range (0,range] + KOKKOS_INLINE_FUNCTION + double drand(const double& range); + + //Draw a equidistributed double in the range (start,end] + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ); + + //Draw a standard normal distributed double + KOKKOS_INLINE_FUNCTION + double normal() ; + + //Draw a normal distributed double with given mean and standard deviation + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0); + } + + //Additional Functions: + + //Fills view with random numbers in the range (0,range] + template + void fill_random(ViewType view, PoolType pool, ViewType::value_type range); + + //Fills view with random numbers in the range (start,end] + template + void fill_random(ViewType view, PoolType pool, + ViewType::value_type start, ViewType::value_type end); + +*/ + + template + struct rand; + + + template + struct rand { + + KOKKOS_INLINE_FUNCTION + static short max(){return 127;} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) + {return short((gen.rand()&0xff+256)%256);} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& range) + {return char(gen.rand(range));} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const char& start, const char& end) + {return char(gen.rand(start,end));} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static short max(){return 32767;} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen) + {return short((gen.rand()&0xffff+65536)%32768);} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& range) + {return short(gen.rand(range));} + KOKKOS_INLINE_FUNCTION + static short draw(Generator& gen, const short& start, const short& end) + {return short(gen.rand(start,end));} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static int max(){return Generator::MAX_RAND;} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen) + {return gen.rand();} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& range) + {return gen.rand(range);} + KOKKOS_INLINE_FUNCTION + static int draw(Generator& gen, const int& start, const int& end) + {return gen.rand(start,end);} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned int max () { + return Generator::MAX_URAND; + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw (Generator& gen) { + return gen.urand (); + } + KOKKOS_INLINE_FUNCTION + static unsigned int draw(Generator& gen, const unsigned int& range) { + return gen.urand (range); + } + KOKKOS_INLINE_FUNCTION + static unsigned int + draw (Generator& gen, const unsigned int& start, const unsigned int& end) { + return gen.urand (start, end); + } + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static long max () { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (Generator::MAX_RAND) : + static_cast (Generator::MAX_RAND64); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (gen.rand ()) : + static_cast (gen.rand64 ()); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen, const long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (gen.rand (static_cast (range))) : + static_cast (gen.rand64 (range)); + } + KOKKOS_INLINE_FUNCTION + static long draw (Generator& gen, const long& start, const long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (long) == 4 ? + static_cast (gen.rand (static_cast (start), + static_cast (end))) : + static_cast (gen.rand64 (start, end)); + } + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long max () { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (Generator::MAX_URAND) : + static_cast (Generator::MAX_URAND64); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (gen.urand ()) : + static_cast (gen.urand64 ()); + } + KOKKOS_INLINE_FUNCTION + static unsigned long draw(Generator& gen, const unsigned long& range) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (gen.urand (static_cast (range))) : + static_cast (gen.urand64 (range)); + } + KOKKOS_INLINE_FUNCTION + static unsigned long + draw (Generator& gen, const unsigned long& start, const unsigned long& end) { + // FIXME (mfh 26 Oct 2014) It would be better to select the + // return value at compile time, using something like enable_if. + return sizeof (unsigned long) == 4 ? + static_cast (gen.urand (static_cast (start), + static_cast (end))) : + static_cast (gen.urand64 (start, end)); + } + }; + + // NOTE (mfh 26 oct 2014) This is a partial specialization for long + // long, a C99 / C++11 signed type which is guaranteed to be at + // least 64 bits. Do NOT write a partial specialization for + // int64_t!!! This is just a typedef! It could be either long or + // long long. We don't know which a priori, and I've seen both. + // The types long and long long are guaranteed to differ, so it's + // always safe to specialize for both. + template + struct rand { + KOKKOS_INLINE_FUNCTION + static long long max () { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return Generator::MAX_RAND64; + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (); + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen, const long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (range); + } + KOKKOS_INLINE_FUNCTION + static long long draw (Generator& gen, const long long& start, const long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.rand64 (start, end); + } + }; + + // NOTE (mfh 26 oct 2014) This is a partial specialization for + // unsigned long long, a C99 / C++11 unsigned type which is + // guaranteed to be at least 64 bits. Do NOT write a partial + // specialization for uint64_t!!! This is just a typedef! It could + // be either unsigned long or unsigned long long. We don't know + // which a priori, and I've seen both. The types unsigned long and + // unsigned long long are guaranteed to differ, so it's always safe + // to specialize for both. + template + struct rand { + KOKKOS_INLINE_FUNCTION + static unsigned long long max () { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. + return Generator::MAX_URAND64; + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw (Generator& gen) { + // FIXME (mfh 26 Oct 2014) It's legal for unsigned long long to be > 64 bits. + return gen.urand64 (); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long draw (Generator& gen, const unsigned long long& range) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64 (range); + } + KOKKOS_INLINE_FUNCTION + static unsigned long long + draw (Generator& gen, const unsigned long long& start, const unsigned long long& end) { + // FIXME (mfh 26 Oct 2014) It's legal for long long to be > 64 bits. + return gen.urand64 (start, end); + } + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static float max(){return 1.0f;} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen) + {return gen.frand();} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& range) + {return gen.frand(range);} + KOKKOS_INLINE_FUNCTION + static float draw(Generator& gen, const float& start, const float& end) + {return gen.frand(start,end);} + + }; + + template + struct rand { + KOKKOS_INLINE_FUNCTION + static double max(){return 1.0;} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen) + {return gen.drand();} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& range) + {return gen.drand(range);} + KOKKOS_INLINE_FUNCTION + static double draw(Generator& gen, const double& start, const double& end) + {return gen.drand(start,end);} + + }; + + template + struct rand > { + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex max () { + return ::Kokkos::complex (1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen) { + const float re = gen.frand (); + const float im = gen.frand (); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& range) { + const float re = gen.frand (real (range)); + const float im = gen.frand (imag (range)); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& start, const ::Kokkos::complex& end) { + const float re = gen.frand (real (start), real (end)); + const float im = gen.frand (imag (start), imag (end)); + return ::Kokkos::complex (re, im); + } + }; + + template + struct rand > { + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex max () { + return ::Kokkos::complex (1.0, 1.0); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen) { + const double re = gen.drand (); + const double im = gen.drand (); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& range) { + const double re = gen.drand (real (range)); + const double im = gen.drand (imag (range)); + return ::Kokkos::complex (re, im); + } + KOKKOS_INLINE_FUNCTION + static ::Kokkos::complex draw (Generator& gen, const ::Kokkos::complex& start, const ::Kokkos::complex& end) { + const double re = gen.drand (real (start), real (end)); + const double im = gen.drand (imag (start), imag (end)); + return ::Kokkos::complex (re, im); + } + }; + + template + class Random_XorShift64_Pool; + + template + class Random_XorShift64 { + private: + uint64_t state_; + const int state_idx_; + friend class Random_XorShift64_Pool; + public: + + typedef DeviceType device_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffff/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffLL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift64 (uint64_t state, int state_idx = 0) + : state_(state),state_idx_(state_idx){} + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + + uint64_t tmp = state_ * 2685821657736338717ULL; + tmp = tmp>>16; + return static_cast(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + state_ ^= state_ >> 12; + state_ ^= state_ << 25; + state_ ^= state_ >> 27; + return (state_ * 2685821657736338717ULL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + tmp = urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + tmp = urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + tmp = rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + tmp = rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return drand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + + }; + + template + class Random_XorShift64_Pool { + private: + typedef View lock_type; + typedef View state_data_type; + lock_type locks_; + state_data_type state_; + int num_states_; + + public: + typedef Random_XorShift64 generator_type; + typedef DeviceType device_type; + + Random_XorShift64_Pool() { + num_states_ = 0; + } + Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,DeviceType::max_hardware_threads()); + } + + Random_XorShift64_Pool(const Random_XorShift64_Pool& src): + locks_(src.locks_), + state_(src.state_), + num_states_(src.num_states_) + {} + + Random_XorShift64_Pool operator = (const Random_XorShift64_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + num_states_ = src.num_states_; + return *this; + } + + void init(uint64_t seed, int num_states) { + num_states_ = num_states; + + locks_ = lock_type("Kokkos::Random_XorShift64::locks",num_states_); + state_ = state_data_type("Kokkos::Random_XorShift64::state",num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename lock_type::HostMirror h_lock = create_mirror_view(locks_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 gen(seed,0); + for(int i = 0; i < 17; i++) + gen.rand(); + for(int i = 0; i < num_states_; i++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i) = (((static_cast(n1)) & 0xffff)<<00) | + (((static_cast(n2)) & 0xffff)<<16) | + (((static_cast(n3)) & 0xffff)<<32) | + (((static_cast(n4)) & 0xffff)<<48); + h_lock(i) = 0; + } + deep_copy(state_,h_state); + deep_copy(locks_,h_lock); + } + + KOKKOS_INLINE_FUNCTION + Random_XorShift64 get_state() const { + const int i = DeviceType::hardware_thread_id();; + return Random_XorShift64(state_(i),i); + } + + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift64& state) const { + state_(state.state_idx_) = state.state_; + } + }; + + + template + class Random_XorShift1024_Pool; + + template + class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t state_[16]; + friend class Random_XorShift1024_Pool; + public: + + typedef Random_XorShift1024_Pool pool_type; + typedef DeviceType device_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx){ + for(int i=0 ; i<16; i++) + state_[i] = state(state_idx,i); + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = ( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + tmp = tmp>>16; + return static_cast(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[ p_ ]; + uint64_t state_1 = state_[ p_ = ( p_ + 1 ) & 15 ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return (( state_[ p_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + tmp = urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + tmp = urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + tmp = rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + tmp = rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return frand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + }; + + + template + class Random_XorShift1024_Pool { + private: + typedef View int_view_type; + typedef View state_data_type; + + int_view_type locks_; + state_data_type state_; + int_view_type p_; + int num_states_; + friend class Random_XorShift1024; + + public: + typedef Random_XorShift1024 generator_type; + + typedef DeviceType device_type; + + Random_XorShift1024_Pool() { + num_states_ = 0; + } + + inline + Random_XorShift1024_Pool(uint64_t seed){ + num_states_ = 0; + init(seed,DeviceType::max_hardware_threads()); + } + + Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src): + locks_(src.locks_), + state_(src.state_), + p_(src.p_), + num_states_(src.num_states_) + {} + + Random_XorShift1024_Pool operator = (const Random_XorShift1024_Pool& src) { + locks_ = src.locks_; + state_ = src.state_; + p_ = src.p_; + num_states_ = src.num_states_; + return *this; + } + + inline + void init(uint64_t seed, int num_states) { + num_states_ = num_states; + + locks_ = int_view_type("Kokkos::Random_XorShift1024::locks",num_states_); + state_ = state_data_type("Kokkos::Random_XorShift1024::state",num_states_); + p_ = int_view_type("Kokkos::Random_XorShift1024::p",num_states_); + + typename state_data_type::HostMirror h_state = create_mirror_view(state_); + typename int_view_type::HostMirror h_lock = create_mirror_view(locks_); + typename int_view_type::HostMirror h_p = create_mirror_view(p_); + + // Execute on the HostMirror's default execution space. + Random_XorShift64 gen(seed,0); + for(int i = 0; i < 17; i++) + gen.rand(); + for(int i = 0; i < num_states_; i++) { + for(int j = 0; j < 16 ; j++) { + int n1 = gen.rand(); + int n2 = gen.rand(); + int n3 = gen.rand(); + int n4 = gen.rand(); + h_state(i,j) = (((static_cast(n1)) & 0xffff)<<00) | + (((static_cast(n2)) & 0xffff)<<16) | + (((static_cast(n3)) & 0xffff)<<32) | + (((static_cast(n4)) & 0xffff)<<48); + } + h_p(i) = 0; + h_lock(i) = 0; + } + deep_copy(state_,h_state); + deep_copy(locks_,h_lock); + } + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 get_state() const { + const int i = DeviceType::hardware_thread_id(); + return Random_XorShift1024(state_,p_(i),i); + }; + + KOKKOS_INLINE_FUNCTION + void free_state(const Random_XorShift1024& state) const { + for(int i = 0; i<16; i++) + state_(state.state_idx_,i) = state.state_[i]; + p_(state.state_idx_) = state.p_; + } + }; + +#if defined(KOKKOS_HAVE_CUDA) && defined(__CUDACC__) + + template<> + class Random_XorShift1024 { + private: + int p_; + const int state_idx_; + uint64_t* state_; + const int stride_; + friend class Random_XorShift1024_Pool; + public: + + typedef Kokkos::Cuda device_type; + typedef Random_XorShift1024_Pool pool_type; + + enum {MAX_URAND = 0xffffffffU}; + enum {MAX_URAND64 = 0xffffffffffffffffULL-1}; + enum {MAX_RAND = static_cast(0xffffffffU/2)}; + enum {MAX_RAND64 = static_cast(0xffffffffffffffffULL/2-1)}; + + KOKKOS_INLINE_FUNCTION + Random_XorShift1024 (const typename pool_type::state_data_type& state, int p, int state_idx = 0): + p_(p),state_idx_(state_idx),state_(&state(state_idx,0)),stride_(state.stride_1()){ + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand() { + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + uint64_t tmp = ( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981ULL; + tmp = tmp>>16; + return static_cast(tmp&MAX_URAND); + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64() { + uint64_t state_0 = state_[ p_ * stride_ ]; + uint64_t state_1 = state_[ (p_ = ( p_ + 1 ) & 15) * stride_ ]; + state_1 ^= state_1 << 31; + state_1 ^= state_1 >> 11; + state_0 ^= state_0 >> 30; + return (( state_[ p_ * stride_ ] = state_0 ^ state_1 ) * 1181783497276652981LL) - 1; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& range) { + const uint32_t max_val = (MAX_URAND/range)*range; + uint32_t tmp = urand(); + while(tmp>=max_val) + urand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint32_t urand(const uint32_t& start, const uint32_t& end ) { + return urand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& range) { + const uint64_t max_val = (MAX_URAND64/range)*range; + uint64_t tmp = urand64(); + while(tmp>=max_val) + urand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + uint64_t urand64(const uint64_t& start, const uint64_t& end ) { + return urand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int rand() { + return static_cast(urand()/2); + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& range) { + const int max_val = (MAX_RAND/range)*range; + int tmp = rand(); + while(tmp>=max_val) + rand(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int rand(const int& start, const int& end ) { + return rand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64() { + return static_cast(urand64()/2); + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& range) { + const int64_t max_val = (MAX_RAND64/range)*range; + int64_t tmp = rand64(); + while(tmp>=max_val) + rand64(); + return tmp%range; + } + + KOKKOS_INLINE_FUNCTION + int64_t rand64(const int64_t& start, const int64_t& end ) { + return rand64(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + float frand() { + return 1.0f * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + float frand(const float& start, const float& end ) { + return frand(end-start)+start; + } + + KOKKOS_INLINE_FUNCTION + double drand() { + return 1.0 * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& range) { + return range * urand64()/MAX_URAND64; + } + + KOKKOS_INLINE_FUNCTION + double drand(const double& start, const double& end ) { + return frand(end-start)+start; + } + + //Marsaglia polar method for drawing a standard normal distributed random number + KOKKOS_INLINE_FUNCTION + double normal() { + double S = 2.0; + double U; + while(S>=1.0) { + U = drand(); + const double V = drand(); + S = U*U+V*V; + } + return U*sqrt(-2.0*log(S)/S); + } + + KOKKOS_INLINE_FUNCTION + double normal(const double& mean, const double& std_dev=1.0) { + return mean + normal()*std_dev; + } + }; + +template<> +inline +Random_XorShift64_Pool::Random_XorShift64_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift64 Random_XorShift64_Pool::get_state() const { +#ifdef __CUDA_ARCH__ + const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; + int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * + blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim.x*blockDim.y*blockDim.z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift64(state_(i),i); +#else + return Random_XorShift64(state_(0),0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift64_Pool::free_state(const Random_XorShift64 &state) const { +#ifdef __CUDA_ARCH__ + state_(state.state_idx_) = state.state_; + locks_(state.state_idx_) = 0; + return; +#endif +} + + +template<> +inline +Random_XorShift1024_Pool::Random_XorShift1024_Pool(uint64_t seed) { + num_states_ = 0; + init(seed,4*32768); +} + +template<> +KOKKOS_INLINE_FUNCTION +Random_XorShift1024 Random_XorShift1024_Pool::get_state() const { +#ifdef __CUDA_ARCH__ + const int i_offset = (threadIdx.x*blockDim.y + threadIdx.y)*blockDim.z+threadIdx.z; + int i = (((blockIdx.x*gridDim.y+blockIdx.y)*gridDim.z + blockIdx.z) * + blockDim.x*blockDim.y*blockDim.z + i_offset)%num_states_; + while(Kokkos::atomic_compare_exchange(&locks_(i),0,1)) { + i+=blockDim.x*blockDim.y*blockDim.z; + if(i>=num_states_) {i = i_offset;} + } + + return Random_XorShift1024(state_, p_(i), i); +#else + return Random_XorShift1024(state_, p_(0), 0); +#endif +} + +template<> +KOKKOS_INLINE_FUNCTION +void Random_XorShift1024_Pool::free_state(const Random_XorShift1024 &state) const { +#ifdef __CUDA_ARCH__ + for(int i=0; i<16; i++) + state_(state.state_idx_,i) = state.state_[i]; + locks_(state.state_idx_) = 0; + return; +#endif +} + + +#endif + + +namespace Impl { + +template +struct fill_random_functor_range; +template +struct fill_random_functor_begin_end; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const IndexType& i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) + a(idx) = Rand::draw(gen,range); + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + a(idx,k) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + a(idx,k,l) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + a(idx,k,l,m) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + a(idx,k,l,m,n) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + a(idx,k,l,m,n,o) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) + a(idx,k,l,m,n,o,p) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_range{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type range; + + typedef rand Rand; + + fill_random_functor_range(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type range_): + a(a_),rand_pool(rand_pool_),range(range_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) + for(IndexType q=0;q(a.dimension_7());q++) + a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range); + } + } + rand_pool.free_state(gen); + } +}; +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) + a(idx) = Rand::draw(gen,begin,end); + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + a(idx,k) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + + +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + a(idx,k,l) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + a(idx,k,l,m) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())){ + for(IndexType l=0;l(a.dimension_1());l++) + for(IndexType m=0;m(a.dimension_2());m++) + for(IndexType n=0;n(a.dimension_3());n++) + for(IndexType o=0;o(a.dimension_4());o++) + a(idx,l,m,n,o) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + + +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) + a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +template +struct fill_random_functor_begin_end{ + typedef typename ViewType::execution_space execution_space; + ViewType a; + RandomPool rand_pool; + typename ViewType::const_value_type begin,end; + + typedef rand Rand; + + fill_random_functor_begin_end(ViewType a_, RandomPool rand_pool_, + typename ViewType::const_value_type begin_, typename ViewType::const_value_type end_): + a(a_),rand_pool(rand_pool_),begin(begin_),end(end_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (IndexType i) const { + typename RandomPool::generator_type gen = rand_pool.get_state(); + for(IndexType j=0;j(a.dimension_0())) { + for(IndexType k=0;k(a.dimension_1());k++) + for(IndexType l=0;l(a.dimension_2());l++) + for(IndexType m=0;m(a.dimension_3());m++) + for(IndexType n=0;n(a.dimension_4());n++) + for(IndexType o=0;o(a.dimension_5());o++) + for(IndexType p=0;p(a.dimension_6());p++) + for(IndexType q=0;q(a.dimension_7());q++) + a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end); + } + } + rand_pool.free_state(gen); + } +}; + +} + +template +void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) { + int64_t LDA = a.dimension_0(); + if(LDA>0) + parallel_for((LDA+127)/128,Impl::fill_random_functor_range(a,g,range)); +} + +template +void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) { + int64_t LDA = a.dimension_0(); + if(LDA>0) + parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end(a,g,begin,end)); +} +} + +#endif diff --git a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp new file mode 100644 index 0000000000..6123ce978c --- /dev/null +++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp @@ -0,0 +1,496 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + + +#ifndef KOKKOS_SORT_HPP_ +#define KOKKOS_SORT_HPP_ + +#include + +#include + +namespace Kokkos { + + namespace SortImpl { + + template + struct CopyOp; + + template + struct CopyOp { + template + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + dst(i_dst) = src(i_src); + } + }; + + template + struct CopyOp { + template + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + for(int j = 0;j< (int) dst.dimension_1(); j++) + dst(i_dst,j) = src(i_src,j); + } + }; + + template + struct CopyOp { + template + KOKKOS_INLINE_FUNCTION + static void copy(DstType& dst, size_t i_dst, + SrcType& src, size_t i_src ) { + for(int j = 0; j +class BinSort { + + +public: + template + struct bin_sort_sort_functor { + typedef ExecutionSpace execution_space; + typedef typename ValuesViewType::non_const_type values_view_type; + typedef typename ValuesViewType::const_type const_values_view_type; + Kokkos::View > values; + values_view_type sorted_values; + typename PermuteViewType::const_type sort_order; + bin_sort_sort_functor(const_values_view_type values_, values_view_type sorted_values_, PermuteViewType sort_order_): + values(values_),sorted_values(sorted_values_),sort_order(sort_order_) {} + + KOKKOS_INLINE_FUNCTION + void operator() (const int& i) const { + //printf("Sort: %i %i\n",i,sort_order(i)); + CopyOp::copy(sorted_values,i,values,sort_order(i)); + } + }; + + typedef ExecutionSpace execution_space; + typedef BinSortOp bin_op_type; + + struct bin_count_tag {}; + struct bin_offset_tag {}; + struct bin_binning_tag {}; + struct bin_sort_bins_tag {}; + +public: + typedef SizeType size_type; + typedef size_type value_type; + + typedef Kokkos::View offset_type; + typedef Kokkos::View bin_count_type; + + + typedef Kokkos::View const_key_view_type; + typedef Kokkos::View > const_rnd_key_view_type; + + typedef typename KeyViewType::non_const_value_type non_const_key_scalar; + typedef typename KeyViewType::const_value_type const_key_scalar; + +private: + const_key_view_type keys; + const_rnd_key_view_type keys_rnd; + +public: + BinSortOp bin_op; + + offset_type bin_offsets; + + Kokkos::View > bin_count_atomic; + bin_count_type bin_count_const; + + offset_type sort_order; + + bool sort_within_bins; + +public: + + // Constructor: takes the keys, the binning_operator and optionally whether to sort within bins (default false) + BinSort(const_key_view_type keys_, BinSortOp bin_op_, + bool sort_within_bins_ = false) + :keys(keys_),keys_rnd(keys_), bin_op(bin_op_) { + + bin_count_atomic = Kokkos::View("Kokkos::SortImpl::BinSortFunctor::bin_count",bin_op.max_bins()); + bin_count_const = bin_count_atomic; + bin_offsets = offset_type("Kokkos::SortImpl::BinSortFunctor::bin_offsets",bin_op.max_bins()); + sort_order = offset_type("PermutationVector",keys.dimension_0()); + sort_within_bins = sort_within_bins_; + } + + // Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed + void create_permute_vector() { + Kokkos::parallel_for (Kokkos::RangePolicy (0,keys.dimension_0()),*this); + Kokkos::parallel_scan(Kokkos::RangePolicy (0,bin_op.max_bins()) ,*this); + + Kokkos::deep_copy(bin_count_atomic,0); + Kokkos::parallel_for (Kokkos::RangePolicy (0,keys.dimension_0()),*this); + + if(sort_within_bins) + Kokkos::parallel_for (Kokkos::RangePolicy(0,bin_op.max_bins()) ,*this); + } + + // Sort a view with respect ot the first dimension using the permutation array + template + void sort(ValuesViewType values) { + ValuesViewType sorted_values = ValuesViewType("Copy", + values.dimension_0(), + values.dimension_1(), + values.dimension_2(), + values.dimension_3(), + values.dimension_4(), + values.dimension_5(), + values.dimension_6(), + values.dimension_7()); + + parallel_for(values.dimension_0(), + bin_sort_sort_functor >(values,sorted_values,sort_order)); + + deep_copy(values,sorted_values); + } + + // Get the permutation vector + KOKKOS_INLINE_FUNCTION + offset_type get_permute_vector() const { return sort_order;} + + // Get the start offsets for each bin + KOKKOS_INLINE_FUNCTION + offset_type get_bin_offsets() const { return bin_offsets;} + + // Get the count for each bin + KOKKOS_INLINE_FUNCTION + bin_count_type get_bin_count() const {return bin_count_const;} + +public: + KOKKOS_INLINE_FUNCTION + void operator() (const bin_count_tag& tag, const int& i) const { + bin_count_atomic(bin_op.bin(keys,i))++; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_offset_tag& tag, const int& i, value_type& offset, const bool& final) const { + if(final) { + bin_offsets(i) = offset; + } + offset+=bin_count_const(i); + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_binning_tag& tag, const int& i) const { + const int bin = bin_op.bin(keys,i); + const int count = bin_count_atomic(bin)++; + + sort_order(bin_offsets(bin) + count) = i; + } + + KOKKOS_INLINE_FUNCTION + void operator() (const bin_sort_bins_tag& tag, const int&i ) const { + bool sorted = false; + int upper_bound = bin_offsets(i)+bin_count_const(i); + while(!sorted) { + sorted = true; + int old_idx = sort_order(bin_offsets(i)); + int new_idx; + for(int k=bin_offsets(i)+1; k +struct DefaultBinOp1D { + const int max_bins_; + const double mul_; + typename KeyViewType::const_value_type range_; + typename KeyViewType::const_value_type min_; + + //Construct BinOp with number of bins, minimum value and maxuimum value + DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min, + typename KeyViewType::const_value_type max ) + :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {} + + //Determine bin index from key value + template + KOKKOS_INLINE_FUNCTION + int bin(ViewType& keys, const int& i) const { + return int(mul_*(keys(i)-min_)); + } + + //Return maximum bin index + 1 + KOKKOS_INLINE_FUNCTION + int max_bins() const { + return max_bins_; + } + + //Compare to keys within a bin if true new_val will be put before old_val + template + KOKKOS_INLINE_FUNCTION + bool operator()(ViewType& keys, iType1& i1, iType2& i2) const { + return keys(i1) +struct DefaultBinOp3D { + int max_bins_[3]; + double mul_[3]; + typename KeyViewType::non_const_value_type range_[3]; + typename KeyViewType::non_const_value_type min_[3]; + + DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[], + typename KeyViewType::const_value_type max[] ) + { + max_bins_[0] = max_bins__[0]+1; + max_bins_[1] = max_bins__[1]+1; + max_bins_[2] = max_bins__[2]+1; + mul_[0] = 1.0*max_bins__[0]/(max[0]-min[0]); + mul_[1] = 1.0*max_bins__[1]/(max[1]-min[1]); + mul_[2] = 1.0*max_bins__[2]/(max[2]-min[2]); + range_[0] = max[0]-min[0]; + range_[1] = max[1]-min[1]; + range_[2] = max[2]-min[2]; + min_[0] = min[0]; + min_[1] = min[1]; + min_[2] = min[2]; + } + + template + KOKKOS_INLINE_FUNCTION + int bin(ViewType& keys, const int& i) const { + return int( (((int(mul_[0]*(keys(i,0)-min_[0]))*max_bins_[1]) + + int(mul_[1]*(keys(i,1)-min_[1])))*max_bins_[2]) + + int(mul_[2]*(keys(i,2)-min_[2]))); + } + + KOKKOS_INLINE_FUNCTION + int max_bins() const { + return max_bins_[0]*max_bins_[1]*max_bins_[2]; + } + + template + KOKKOS_INLINE_FUNCTION + bool operator()(ViewType& keys, iType1& i1 , iType2& i2) const { + if (keys(i1,0)>keys(i2,0)) return true; + else if (keys(i1,0)==keys(i2,0)) { + if (keys(i1,1)>keys(i2,1)) return true; + else if (keys(i1,1)==keys(i2,2)) { + if (keys(i1,2)>keys(i2,2)) return true; + } + } + return false; + } +}; + +template +struct min_max { + Scalar min; + Scalar max; + bool init; + + KOKKOS_INLINE_FUNCTION + min_max() { + min = 0; + max = 0; + init = 0; + } + + KOKKOS_INLINE_FUNCTION + min_max (const min_max& val) { + min = val.min; + max = val.max; + init = val.init; + } + + KOKKOS_INLINE_FUNCTION + min_max operator = (const min_max& val) { + min = val.min; + max = val.max; + init = val.init; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+= (const Scalar& val) { + if(init) { + min = minval?max:val; + } else { + min = val; + max = val; + init = 1; + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (const min_max& val) { + if(init && val.init) { + min = minval.max?max:val.max; + } else { + if(val.init) { + min = val.min; + max = val.max; + init = 1; + } + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (volatile const Scalar& val) volatile { + if(init) { + min = minval?max:val; + } else { + min = val; + max = val; + init = 1; + } + } + + KOKKOS_INLINE_FUNCTION + void operator+= (volatile const min_max& val) volatile { + if(init && val.init) { + min = minval.max?max:val.max; + } else { + if(val.init) { + min = val.min; + max = val.max; + init = 1; + } + } + } +}; + + +template +struct min_max_functor { + typedef typename ViewType::execution_space execution_space; + ViewType view; + typedef min_max value_type; + min_max_functor (const ViewType view_):view(view_) { + } + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t& i, value_type& val) const { + val += view(i); + } +}; + +template +bool try_std_sort(ViewType view) { + bool possible = true; +#if ! KOKKOS_USING_EXP_VIEW + size_t stride[8]; + view.stride(stride); +#else + size_t stride[8] = { view.stride_0() + , view.stride_1() + , view.stride_2() + , view.stride_3() + , view.stride_4() + , view.stride_5() + , view.stride_6() + , view.stride_7() + }; +#endif + possible = possible && Impl::is_same::value; + possible = possible && (ViewType::Rank == 1); + possible = possible && (stride[0] == 1); + if(possible) { + std::sort(view.ptr_on_device(),view.ptr_on_device()+view.dimension_0()); + } + return possible; +} + +} + +template +void sort(ViewType view, bool always_use_kokkos_sort = false) { + if(!always_use_kokkos_sort) { + if(SortImpl::try_std_sort(view)) return; + } + + typedef SortImpl::DefaultBinOp1D CompType; + SortImpl::min_max val; + parallel_reduce(view.dimension_0(),SortImpl::min_max_functor(view),val); + BinSort bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true); + bin_sort.create_permute_vector(); + bin_sort.sort(view); +} + +/*template +void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) { + +}*/ + +} + +#endif diff --git a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt new file mode 100644 index 0000000000..654104b44e --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt @@ -0,0 +1,38 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +SET(SOURCES + UnitTestMain.cpp + TestCuda.cpp + ) + +SET(LIBRARIES kokkoscore) + +IF(Kokkos_ENABLE_OpenMP) + LIST( APPEND SOURCES + TestOpenMP.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_Serial) + LIST( APPEND SOURCES + TestSerial.cpp + ) +ENDIF() + +IF(Kokkos_ENABLE_Pthread) + LIST( APPEND SOURCES + TestThreads.cpp + ) +ENDIF() + +TRIBITS_ADD_EXECUTABLE_AND_TEST( + UnitTest + SOURCES ${SOURCES} + COMM serial mpi + NUM_MPI_PROCS 1 + FAIL_REGULAR_EXPRESSION " FAILED " + TESTONLYLIBS kokkos_gtest + ) diff --git a/lib/kokkos/algorithms/unit_tests/Makefile b/lib/kokkos/algorithms/unit_tests/Makefile new file mode 100644 index 0000000000..5d79364c52 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/Makefile @@ -0,0 +1,92 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../TPL/gtest + +vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_OpenMP + TEST_TARGETS += test-openmp +endif + +ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1) + OBJ_SERIAL = TestSerial.o UnitTestMain.o gtest-all.o + TARGETS += KokkosAlgorithms_UnitTest_Serial + TEST_TARGETS += test-serial +endif + +KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda + +KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads + +KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP + +KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial + +test-cuda: KokkosAlgorithms_UnitTest_Cuda + ./KokkosAlgorithms_UnitTest_Cuda + +test-threads: KokkosAlgorithms_UnitTest_Threads + ./KokkosAlgorithms_UnitTest_Threads + +test-openmp: KokkosAlgorithms_UnitTest_OpenMP + ./KokkosAlgorithms_UnitTest_OpenMP + +test-serial: KokkosAlgorithms_UnitTest_Serial + ./KokkosAlgorithms_UnitTest_Serial + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp new file mode 100644 index 0000000000..d19c778c46 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp @@ -0,0 +1,110 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include + +#include + +#include + +#ifdef KOKKOS_HAVE_CUDA + +#include +#include + +namespace Test { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +void cuda_test_random_xorshift64( int num_draws ) +{ + Impl::test_random >(num_draws); +} + +void cuda_test_random_xorshift1024( int num_draws ) +{ + Impl::test_random >(num_draws); +} + + +#define CUDA_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( cuda, Random_XorShift64 ) { \ + cuda_test_random_xorshift64(num_draws); \ + } + +#define CUDA_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( cuda, Random_XorShift1024 ) { \ + cuda_test_random_xorshift1024(num_draws); \ + } + +#define CUDA_SORT_UNSIGNED( size ) \ + TEST_F( cuda, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Cuda, unsigned >(size); \ + } + +CUDA_RANDOM_XORSHIFT64( 132141141 ) +CUDA_RANDOM_XORSHIFT1024( 52428813 ) +CUDA_SORT_UNSIGNED(171) + +#undef CUDA_RANDOM_XORSHIFT64 +#undef CUDA_RANDOM_XORSHIFT1024 +#undef CUDA_SORT_UNSIGNED +} + +#endif /* #ifdef KOKKOS_HAVE_CUDA */ + diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp new file mode 100644 index 0000000000..4b06dffcb6 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +//---------------------------------------------------------------------------- +#include +#include +#include + +namespace Test { + +#ifdef KOKKOS_HAVE_OPENMP +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned threads_count = omp_get_max_threads(); + + if ( Kokkos::hwloc::available() ) { + threads_count = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa(); + } + + Kokkos::OpenMP::initialize( threads_count ); + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + } +}; + +#define OPENMP_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( openmp, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define OPENMP_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( openmp, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define OPENMP_SORT_UNSIGNED( size ) \ + TEST_F( openmp, SortUnsigned ) { \ + Impl::test_sort< Kokkos::OpenMP, unsigned >(size); \ + } + +OPENMP_RANDOM_XORSHIFT64( 10240000 ) +OPENMP_RANDOM_XORSHIFT1024( 10130144 ) +OPENMP_SORT_UNSIGNED(171) + +#undef OPENMP_RANDOM_XORSHIFT64 +#undef OPENMP_RANDOM_XORSHIFT1024 +#undef OPENMP_SORT_UNSIGNED +#endif +} // namespace test + diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp new file mode 100644 index 0000000000..c906b9f2cd --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -0,0 +1,481 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_DUALVIEW_HPP +#define KOKKOS_TEST_DUALVIEW_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Test { + +namespace Impl{ + +// This test runs the random number generators and uses some statistic tests to +// check the 'goodness' of the random numbers: +// (i) mean: the mean is expected to be 0.5*RAND_MAX +// (ii) variance: the variance is 1/3*mean*mean +// (iii) covariance: the covariance is 0 +// (iv) 1-tupledistr: the mean, variance and covariance of a 1D Histrogram of random numbers +// (v) 3-tupledistr: the mean, variance and covariance of a 3D Histrogram of random numbers + +#define HIST_DIM3D 24 +#define HIST_DIM1D (HIST_DIM3D*HIST_DIM3D*HIST_DIM3D) + +struct RandomProperties { + uint64_t count; + double mean; + double variance; + double covariance; + double min; + double max; + + KOKKOS_INLINE_FUNCTION + RandomProperties() { + count = 0; + mean = 0.0; + variance = 0.0; + covariance = 0.0; + min = 1e64; + max = -1e64; + } + + KOKKOS_INLINE_FUNCTION + RandomProperties& operator+=(const RandomProperties& add) { + count += add.count; + mean += add.mean; + variance += add.variance; + covariance += add.covariance; + min = add.minmax?add.max:max; + return *this; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile RandomProperties& add) volatile { + count += add.count; + mean += add.mean; + variance += add.variance; + covariance += add.covariance; + min = add.minmax?add.max:max; + } +}; + +template +struct test_random_functor { + typedef typename GeneratorPool::generator_type rnd_type; + + typedef RandomProperties value_type; + typedef typename GeneratorPool::device_type device_type; + + GeneratorPool rand_pool; + const double mean; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View types below. + typedef Kokkos::View type_1d; + type_1d density_1d; + typedef Kokkos::View type_3d; + type_3d density_3d; + + test_random_functor (GeneratorPool rand_pool_, type_1d d1d, type_3d d3d) : + rand_pool (rand_pool_), + mean (0.5*Kokkos::rand::max ()), + density_1d (d1d), + density_3d (d3d) + {} + + KOKKOS_INLINE_FUNCTION + void operator() (int i, RandomProperties& prop) const { + using Kokkos::atomic_fetch_add; + + rnd_type rand_gen = rand_pool.get_state(); + for (int k = 0; k < 1024; ++k) { + const Scalar tmp = Kokkos::rand::draw(rand_gen); + prop.count++; + prop.mean += tmp; + prop.variance += (tmp-mean)*(tmp-mean); + const Scalar tmp2 = Kokkos::rand::draw(rand_gen); + prop.count++; + prop.mean += tmp2; + prop.variance += (tmp2-mean)*(tmp2-mean); + prop.covariance += (tmp-mean)*(tmp2-mean); + const Scalar tmp3 = Kokkos::rand::draw(rand_gen); + prop.count++; + prop.mean += tmp3; + prop.variance += (tmp3-mean)*(tmp3-mean); + prop.covariance += (tmp2-mean)*(tmp3-mean); + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to + // define an exclusive upper bound on the range of random + // numbers that draw() can generate. However, for the float + // specialization, some implementations might violate this upper + // bound, due to rounding error. Just in case, we have left an + // extra space at the end of each dimension of density_1d and + // density_3d. + // + // Please note that those extra entries might not get counted in + // the histograms. However, if Kokkos::rand is broken and only + // returns values of max(), the histograms will still catch this + // indirectly, since none of the other values will be filled in. + + const Scalar theMax = Kokkos::rand::max (); + + const uint64_t ind1_1d = static_cast (1.0 * HIST_DIM1D * tmp / theMax); + const uint64_t ind2_1d = static_cast (1.0 * HIST_DIM1D * tmp2 / theMax); + const uint64_t ind3_1d = static_cast (1.0 * HIST_DIM1D * tmp3 / theMax); + + const uint64_t ind1_3d = static_cast (1.0 * HIST_DIM3D * tmp / theMax); + const uint64_t ind2_3d = static_cast (1.0 * HIST_DIM3D * tmp2 / theMax); + const uint64_t ind3_3d = static_cast (1.0 * HIST_DIM3D * tmp3 / theMax); + + atomic_fetch_add (&density_1d(ind1_1d), 1); + atomic_fetch_add (&density_1d(ind2_1d), 1); + atomic_fetch_add (&density_1d(ind3_1d), 1); + atomic_fetch_add (&density_3d(ind1_3d, ind2_3d, ind3_3d), 1); + } + rand_pool.free_state(rand_gen); + } +}; + +template +struct test_histogram1d_functor { + typedef RandomProperties value_type; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View type below. + typedef Kokkos::View type_1d; + type_1d density_1d; + double mean; + + test_histogram1d_functor (type_1d d1d, int num_draws) : + density_1d (d1d), + mean (1.0*num_draws/HIST_DIM1D*3) + { + } + + KOKKOS_INLINE_FUNCTION void + operator() (const typename memory_space::size_type i, + RandomProperties& prop) const + { + typedef typename memory_space::size_type size_type; + const double count = density_1d(i); + prop.mean += count; + prop.variance += 1.0 * (count - mean) * (count - mean); + //prop.covariance += 1.0*count*count; + prop.min = count < prop.min ? count : prop.min; + prop.max = count > prop.max ? count : prop.max; + if (i < static_cast (HIST_DIM1D-1)) { + prop.covariance += (count - mean) * (density_1d(i+1) - mean); + } + } +}; + +template +struct test_histogram3d_functor { + typedef RandomProperties value_type; + typedef typename DeviceType::execution_space execution_space; + typedef typename DeviceType::memory_space memory_space; + + // NOTE (mfh 03 Nov 2014): Kokkos::rand::max() is supposed to define + // an exclusive upper bound on the range of random numbers that + // draw() can generate. However, for the float specialization, some + // implementations might violate this upper bound, due to rounding + // error. Just in case, we leave an extra space at the end of each + // dimension, in the View type below. + typedef Kokkos::View type_3d; + type_3d density_3d; + double mean; + + test_histogram3d_functor (type_3d d3d, int num_draws) : + density_3d (d3d), + mean (1.0*num_draws/HIST_DIM1D) + {} + + KOKKOS_INLINE_FUNCTION void + operator() (const typename memory_space::size_type i, + RandomProperties& prop) const + { + typedef typename memory_space::size_type size_type; + const double count = density_3d(i/(HIST_DIM3D*HIST_DIM3D), + (i % (HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, + i % HIST_DIM3D); + prop.mean += count; + prop.variance += (count - mean) * (count - mean); + if (i < static_cast (HIST_DIM1D-1)) { + const double count_next = density_3d((i+1)/(HIST_DIM3D*HIST_DIM3D), + ((i+1)%(HIST_DIM3D*HIST_DIM3D))/HIST_DIM3D, + (i+1)%HIST_DIM3D); + prop.covariance += (count - mean) * (count_next - mean); + } + } +}; + +// +// Templated test that uses the above functors. +// +template +struct test_random_scalar { + typedef typename RandomGenerator::generator_type rnd_type; + + int pass_mean,pass_var,pass_covar; + int pass_hist1d_mean,pass_hist1d_var,pass_hist1d_covar; + int pass_hist3d_mean,pass_hist3d_var,pass_hist3d_covar; + + test_random_scalar (typename test_random_functor::type_1d& density_1d, + typename test_random_functor::type_3d& density_3d, + RandomGenerator& pool, + unsigned int num_draws) + { + using std::cerr; + using std::endl; + using Kokkos::parallel_reduce; + + { + cerr << " -- Testing randomness properties" << endl; + + RandomProperties result; + typedef test_random_functor functor_type; + parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result); + + //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2); + double tolerance = 1.6*sqrt(1.0/num_draws); + double mean_expect = 0.5*Kokkos::rand::max(); + double variance_expect = 1.0/3.0*mean_expect*mean_expect; + double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0; + double variance_eps = variance_expect/(result.variance/num_draws/3)-1.0; + double covariance_eps = result.covariance/num_draws/2/variance_expect; + pass_mean = ((-tolerance < mean_eps) && + ( tolerance > mean_eps)) ? 1:0; + pass_var = ((-1.5*tolerance < variance_eps) && + ( 1.5*tolerance > variance_eps)) ? 1:0; + pass_covar = ((-2.0*tolerance < covariance_eps) && + ( 2.0*tolerance > covariance_eps)) ? 1:0; + cerr << "Pass: " << pass_mean + << " " << pass_var + << " " << mean_eps + << " " << variance_eps + << " " << covariance_eps + << " || " << tolerance << endl; + } + { + cerr << " -- Testing 1-D histogram" << endl; + + RandomProperties result; + typedef test_histogram1d_functor functor_type; + parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result); + + double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double mean_expect = 1.0*num_draws*3/HIST_DIM1D; + double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); + double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D; + double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; + double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; + double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; + pass_hist1d_mean = ((-0.0001 < mean_eps) && + ( 0.0001 > mean_eps)) ? 1:0; + pass_hist1d_var = ((-0.07 < variance_eps) && + ( 0.07 > variance_eps)) ? 1:0; + pass_hist1d_covar = ((-0.06 < covariance_eps) && + ( 0.06 > covariance_eps)) ? 1:0; + + cerr << "Density 1D: " << mean_eps + << " " << variance_eps + << " " << (result.covariance/HIST_DIM1D/HIST_DIM1D) + << " || " << tolerance + << " " << result.min + << " " << result.max + << " || " << result.variance/HIST_DIM1D + << " " << 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D) + << " || " << result.covariance/HIST_DIM1D + << " " << -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D + << endl; + } + { + cerr << " -- Testing 3-D histogram" << endl; + + RandomProperties result; + typedef test_histogram3d_functor functor_type; + parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result); + + double tolerance = 6*sqrt(1.0/HIST_DIM1D); + double mean_expect = 1.0*num_draws/HIST_DIM1D; + double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D); + double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D; + double mean_eps = mean_expect/(result.mean/HIST_DIM1D)-1.0; + double variance_eps = variance_expect/(result.variance/HIST_DIM1D)-1.0; + double covariance_eps = (result.covariance/HIST_DIM1D - covariance_expect)/mean_expect; + pass_hist3d_mean = ((-tolerance < mean_eps) && + ( tolerance > mean_eps)) ? 1:0; + pass_hist3d_var = ((-1.2*tolerance < variance_eps) && + ( 1.2*tolerance > variance_eps)) ? 1:0; + pass_hist3d_covar = ((-tolerance < covariance_eps) && + ( tolerance > covariance_eps)) ? 1:0; + + cerr << "Density 3D: " << mean_eps + << " " << variance_eps + << " " << result.covariance/HIST_DIM1D/HIST_DIM1D + << " || " << tolerance + << " " << result.min + << " " << result.max << endl; + } + } +}; + +template +void test_random(unsigned int num_draws) +{ + using std::cerr; + using std::endl; + typename test_random_functor::type_1d density_1d("D1d"); + typename test_random_functor::type_3d density_3d("D3d"); + + + uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); + cerr << "Test Seed:" << ticks << endl; + + RandomGenerator pool(ticks); + + cerr << "Test Scalar=int" << endl; + test_random_scalar test_int(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_int.pass_mean,1); + ASSERT_EQ( test_int.pass_var,1); + ASSERT_EQ( test_int.pass_covar,1); + ASSERT_EQ( test_int.pass_hist1d_mean,1); + ASSERT_EQ( test_int.pass_hist1d_var,1); + ASSERT_EQ( test_int.pass_hist1d_covar,1); + ASSERT_EQ( test_int.pass_hist3d_mean,1); + ASSERT_EQ( test_int.pass_hist3d_var,1); + ASSERT_EQ( test_int.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=unsigned int" << endl; + test_random_scalar test_uint(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_uint.pass_mean,1); + ASSERT_EQ( test_uint.pass_var,1); + ASSERT_EQ( test_uint.pass_covar,1); + ASSERT_EQ( test_uint.pass_hist1d_mean,1); + ASSERT_EQ( test_uint.pass_hist1d_var,1); + ASSERT_EQ( test_uint.pass_hist1d_covar,1); + ASSERT_EQ( test_uint.pass_hist3d_mean,1); + ASSERT_EQ( test_uint.pass_hist3d_var,1); + ASSERT_EQ( test_uint.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=int64_t" << endl; + test_random_scalar test_int64(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_int64.pass_mean,1); + ASSERT_EQ( test_int64.pass_var,1); + ASSERT_EQ( test_int64.pass_covar,1); + ASSERT_EQ( test_int64.pass_hist1d_mean,1); + ASSERT_EQ( test_int64.pass_hist1d_var,1); + ASSERT_EQ( test_int64.pass_hist1d_covar,1); + ASSERT_EQ( test_int64.pass_hist3d_mean,1); + ASSERT_EQ( test_int64.pass_hist3d_var,1); + ASSERT_EQ( test_int64.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=uint64_t" << endl; + test_random_scalar test_uint64(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_uint64.pass_mean,1); + ASSERT_EQ( test_uint64.pass_var,1); + ASSERT_EQ( test_uint64.pass_covar,1); + ASSERT_EQ( test_uint64.pass_hist1d_mean,1); + ASSERT_EQ( test_uint64.pass_hist1d_var,1); + ASSERT_EQ( test_uint64.pass_hist1d_covar,1); + ASSERT_EQ( test_uint64.pass_hist3d_mean,1); + ASSERT_EQ( test_uint64.pass_hist3d_var,1); + ASSERT_EQ( test_uint64.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=float" << endl; + test_random_scalar test_float(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_float.pass_mean,1); + ASSERT_EQ( test_float.pass_var,1); + ASSERT_EQ( test_float.pass_covar,1); + ASSERT_EQ( test_float.pass_hist1d_mean,1); + ASSERT_EQ( test_float.pass_hist1d_var,1); + ASSERT_EQ( test_float.pass_hist1d_covar,1); + ASSERT_EQ( test_float.pass_hist3d_mean,1); + ASSERT_EQ( test_float.pass_hist3d_var,1); + ASSERT_EQ( test_float.pass_hist3d_covar,1); + deep_copy(density_1d,0); + deep_copy(density_3d,0); + + cerr << "Test Scalar=double" << endl; + test_random_scalar test_double(density_1d,density_3d,pool,num_draws); + ASSERT_EQ( test_double.pass_mean,1); + ASSERT_EQ( test_double.pass_var,1); + ASSERT_EQ( test_double.pass_covar,1); + ASSERT_EQ( test_double.pass_hist1d_mean,1); + ASSERT_EQ( test_double.pass_hist1d_var,1); + ASSERT_EQ( test_double.pass_hist1d_covar,1); + ASSERT_EQ( test_double.pass_hist3d_mean,1); + ASSERT_EQ( test_double.pass_hist3d_var,1); + ASSERT_EQ( test_double.pass_hist3d_covar,1); +} +} + +} // namespace Test + +#endif //KOKKOS_TEST_UNORDERED_MAP_HPP diff --git a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp new file mode 100644 index 0000000000..741cf97ae1 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp @@ -0,0 +1,99 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- + + +namespace Test { + +#ifdef KOKKOS_HAVE_SERIAL +class serial : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision (5) << std::scientific; + Kokkos::Serial::initialize (); + } + + static void TearDownTestCase () + { + Kokkos::Serial::finalize (); + } +}; + +#define SERIAL_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( serial, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define SERIAL_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( serial, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define SERIAL_SORT_UNSIGNED( size ) \ + TEST_F( serial, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Serial, unsigned >(size); \ + } + +SERIAL_RANDOM_XORSHIFT64( 10240000 ) +SERIAL_RANDOM_XORSHIFT1024( 10130144 ) +SERIAL_SORT_UNSIGNED(171) + +#undef SERIAL_RANDOM_XORSHIFT64 +#undef SERIAL_RANDOM_XORSHIFT1024 +#undef SERIAL_SORT_UNSIGNED + +#endif // KOKKOS_HAVE_SERIAL +} // namespace Test + + diff --git a/lib/kokkos/algorithms/unit_tests/TestSort.hpp b/lib/kokkos/algorithms/unit_tests/TestSort.hpp new file mode 100644 index 0000000000..ccbcbdd001 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp @@ -0,0 +1,206 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef TESTSORT_HPP_ +#define TESTSORT_HPP_ + +#include +#include +#include +#include + +namespace Test { + +namespace Impl{ + +template +struct is_sorted_struct { + typedef unsigned int value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + is_sorted_struct(Kokkos::View keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, unsigned int& count) const { + if(keys(i)>keys(i+1)) count++; + } +}; + +template +struct sum { + typedef double value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + sum(Kokkos::View keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, double& count) const { + count+=keys(i); + } +}; + +template +struct bin3d_is_sorted_struct { + typedef unsigned int value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + int max_bins; + Scalar min; + Scalar max; + + bin3d_is_sorted_struct(Kokkos::View keys_,int max_bins_,Scalar min_,Scalar max_): + keys(keys_),max_bins(max_bins_),min(min_),max(max_) { + } + KOKKOS_INLINE_FUNCTION + void operator() (int i, unsigned int& count) const { + int ix1 = int ((keys(i,0)-min)/max * max_bins); + int iy1 = int ((keys(i,1)-min)/max * max_bins); + int iz1 = int ((keys(i,2)-min)/max * max_bins); + int ix2 = int ((keys(i+1,0)-min)/max * max_bins); + int iy2 = int ((keys(i+1,1)-min)/max * max_bins); + int iz2 = int ((keys(i+1,2)-min)/max * max_bins); + + if (ix1>ix2) count++; + else if(ix1==ix2) { + if (iy1>iy2) count++; + else if ((iy1==iy2) && (iz1>iz2)) count++; + } + } +}; + +template +struct sum3D { + typedef double value_type; + typedef ExecutionSpace execution_space; + + Kokkos::View keys; + + sum3D(Kokkos::View keys_):keys(keys_) {} + KOKKOS_INLINE_FUNCTION + void operator() (int i, double& count) const { + count+=keys(i,0); + count+=keys(i,1); + count+=keys(i,2); + } +}; + +template +void test_1D_sort(unsigned int n,bool force_kokkos) { + typedef Kokkos::View KeyViewType; + KeyViewType keys("Keys",n); + + Kokkos::Random_XorShift64_Pool g(1931); + Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool::generator_type::MAX_URAND); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(n,sum(keys),sum_before); + + Kokkos::sort(keys,force_kokkos); + + Kokkos::parallel_reduce(n,sum(keys),sum_after); + Kokkos::parallel_reduce(n-1,is_sorted_struct(keys),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +template +void test_3D_sort(unsigned int n) { + typedef Kokkos::View KeyViewType; + + KeyViewType keys("Keys",n*n*n); + + Kokkos::Random_XorShift64_Pool g(1931); + Kokkos::fill_random(keys,g,100.0); + + double sum_before = 0.0; + double sum_after = 0.0; + unsigned int sort_fails = 0; + + Kokkos::parallel_reduce(keys.dimension_0(),sum3D(keys),sum_before); + + int bin_1d = 1; + while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2; + int bin_max[3] = {bin_1d,bin_1d,bin_1d}; + typename KeyViewType::value_type min[3] = {0,0,0}; + typename KeyViewType::value_type max[3] = {100,100,100}; + + typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp; + BinOp bin_op(bin_max,min,max); + Kokkos::BinSort< KeyViewType , BinOp > + Sorter(keys,bin_op,false); + Sorter.create_permute_vector(); + Sorter.template sort< KeyViewType >(keys); + + Kokkos::parallel_reduce(keys.dimension_0(),sum3D(keys),sum_after); + Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct(keys,bin_1d,min[0],max[0]),sort_fails); + + double ratio = sum_before/sum_after; + double epsilon = 1e-10; + unsigned int equal_sum = (ratio > (1.0-epsilon)) && (ratio < (1.0+epsilon)) ? 1 : 0; + + printf("3D Sort Sum: %f %f Fails: %u\n",sum_before,sum_after,sort_fails); + ASSERT_EQ(sort_fails,0); + ASSERT_EQ(equal_sum,1); +} + +template +void test_sort(unsigned int N) +{ + test_1D_sort(N*N*N, true); + test_1D_sort(N*N*N, false); + test_3D_sort(N); +} + +} +} +#endif /* TESTSORT_HPP_ */ diff --git a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp new file mode 100644 index 0000000000..a61d6c8bd5 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp @@ -0,0 +1,113 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include +#include +#include + + +//---------------------------------------------------------------------------- + + +namespace Test { + +#ifdef KOKKOS_HAVE_PTHREAD +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + // * Kokkos::hwloc::get_available_threads_per_core() + ; + + } + + std::cout << "Threads: " << num_threads << std::endl; + + Kokkos::Threads::initialize( num_threads ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +#define THREADS_RANDOM_XORSHIFT64( num_draws ) \ + TEST_F( threads, Random_XorShift64 ) { \ + Impl::test_random >(num_draws); \ + } + +#define THREADS_RANDOM_XORSHIFT1024( num_draws ) \ + TEST_F( threads, Random_XorShift1024 ) { \ + Impl::test_random >(num_draws); \ + } + +#define THREADS_SORT_UNSIGNED( size ) \ + TEST_F( threads, SortUnsigned ) { \ + Impl::test_sort< Kokkos::Threads, double >(size); \ + } + + +THREADS_RANDOM_XORSHIFT64( 10240000 ) +THREADS_RANDOM_XORSHIFT1024( 10130144 ) +THREADS_SORT_UNSIGNED(171) + +#undef THREADS_RANDOM_XORSHIFT64 +#undef THREADS_RANDOM_XORSHIFT1024 +#undef THREADS_SORT_UNSIGNED + +#endif +} // namespace Test + + diff --git a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp new file mode 100644 index 0000000000..f952ab3db5 --- /dev/null +++ b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/cmake/Dependencies.cmake b/lib/kokkos/cmake/Dependencies.cmake new file mode 100644 index 0000000000..8c51eab4d7 --- /dev/null +++ b/lib/kokkos/cmake/Dependencies.cmake @@ -0,0 +1,10 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS + #SubPackageName Directory Class Req/Opt + # + # New Kokkos subpackages: + Core core PS REQUIRED + Containers containers PS OPTIONAL + Algorithms algorithms PS OPTIONAL + Example example EX OPTIONAL + ) diff --git a/lib/kokkos/cmake/deps/CUDA.cmake b/lib/kokkos/cmake/deps/CUDA.cmake new file mode 100644 index 0000000000..801c20067b --- /dev/null +++ b/lib/kokkos/cmake/deps/CUDA.cmake @@ -0,0 +1,79 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +# Check for CUDA support + +SET(_CUDA_FAILURE OFF) + +# Have CMake find CUDA +IF(NOT _CUDA_FAILURE) + FIND_PACKAGE(CUDA 3.2) + IF (NOT CUDA_FOUND) + SET(_CUDA_FAILURE ON) + ENDIF() +ENDIF() + +IF(NOT _CUDA_FAILURE) + # if we haven't met failure + macro(PACKAGE_ADD_CUDA_LIBRARY cuda_target) + TRIBITS_ADD_LIBRARY(${cuda_target} ${ARGN} CUDALIBRARY) + endmacro() + GLOBAL_SET(TPL_CUDA_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE}) + GLOBAL_SET(TPL_CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_cufft_LIBRARY}) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +ELSE() + SET(TPL_ENABLE_CUDA OFF) +ENDIF() diff --git a/lib/kokkos/cmake/deps/CUSPARSE.cmake b/lib/kokkos/cmake/deps/CUSPARSE.cmake new file mode 100644 index 0000000000..205f5e2a98 --- /dev/null +++ b/lib/kokkos/cmake/deps/CUSPARSE.cmake @@ -0,0 +1,64 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +include(${TRIBITS_DEPS_DIR}/CUDA.cmake) + +IF (TPL_ENABLE_CUDA) + GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) + GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE) +ENDIF() + diff --git a/lib/kokkos/cmake/deps/HWLOC.cmake b/lib/kokkos/cmake/deps/HWLOC.cmake new file mode 100644 index 0000000000..275abd3a5d --- /dev/null +++ b/lib/kokkos/cmake/deps/HWLOC.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: November 2011 +# Checked by: H. Carter Edwards +# Source: http://www.open-mpi.org/projects/hwloc/ +# Version: 1.3 +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC + REQUIRED_HEADERS hwloc.h + REQUIRED_LIBS_NAMES "hwloc" + ) diff --git a/lib/kokkos/cmake/deps/Pthread.cmake b/lib/kokkos/cmake/deps/Pthread.cmake new file mode 100644 index 0000000000..46d0a939ca --- /dev/null +++ b/lib/kokkos/cmake/deps/Pthread.cmake @@ -0,0 +1,83 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +SET(USE_THREADS FALSE) + +IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES) + # Use CMake's Thread finder since it is a bit smarter in determining + # whether pthreads is already built into the compiler and doesn't need + # a library to link. + FIND_PACKAGE(Threads) + #If Threads found a copy of pthreads make sure it is one of the cases the tribits + #tpl system cannot handle. + IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread") + SET(USE_THREADS TRUE) + ENDIF() + ENDIF() +ENDIF() + +IF(USE_THREADS) + SET(TPL_Pthread_INCLUDE_DIRS "") + SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + SET(TPL_Pthread_LIBRARY_DIRS "") + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(Pthread) +ELSE() + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + REQUIRED_HEADERS pthread.h + REQUIRED_LIBS_NAMES pthread + ) +ENDIF() diff --git a/lib/kokkos/cmake/deps/QTHREAD.cmake b/lib/kokkos/cmake/deps/QTHREAD.cmake new file mode 100644 index 0000000000..994b72b200 --- /dev/null +++ b/lib/kokkos/cmake/deps/QTHREAD.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: July 2014 +# Checked by: H. Carter Edwards +# Source: https://code.google.com/p/qthreads +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD + REQUIRED_HEADERS qthread.h + REQUIRED_LIBS_NAMES "qthread" + ) + diff --git a/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake new file mode 100644 index 0000000000..aad1e2bad7 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLCUSPARSE.cmake @@ -0,0 +1,75 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + +# Check for CUDA support + +IF (NOT TPL_ENABLE_CUDA OR CUDA_VERSION VERSION_LESS "4.1") + MESSAGE(FATAL_ERROR "\nCUSPARSE: did not find acceptable version of CUDA libraries (4.1 or greater)") +ELSE() + IF(CMAKE_VERSION VERSION_LESS "2.8.8") + # FindCUDA before CMake 2.8.8 does not find cusparse library; therefore, we must + find_library(CUDA_cusparse_LIBRARY + cusparse + HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib + ) + IF(CUDA_cusparse_LIBRARY STREQUAL "CUDA_cusparse_LIBRARY-NOTFOUND") + MESSAGE(FATAL_ERROR "\nCUSPARSE: could not find cuspasre library.") + ENDIF() + ENDIF(CMAKE_VERSION VERSION_LESS "2.8.8") + GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS) + GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS}) + GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY}) +ENDIF() + diff --git a/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake new file mode 100644 index 0000000000..715b3e9bde --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLHWLOC.cmake @@ -0,0 +1,71 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: November 2011 +# Checked by: H. Carter Edwards +# Source: http://www.open-mpi.org/projects/hwloc/ +# Version: 1.3 +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( HWLOC + REQUIRED_HEADERS hwloc.h + REQUIRED_LIBS_NAMES "hwloc" + ) + diff --git a/lib/kokkos/cmake/tpls/FindTPLPthread.cmake b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake new file mode 100644 index 0000000000..fc401d7543 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLPthread.cmake @@ -0,0 +1,82 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +SET(USE_THREADS FALSE) + +IF(NOT TPL_Pthread_INCLUDE_DIRS AND NOT TPL_Pthread_LIBRARY_DIRS AND NOT TPL_Pthread_LIBRARIES) + # Use CMake's Thread finder since it is a bit smarter in determining + # whether pthreads is already built into the compiler and doesn't need + # a library to link. + FIND_PACKAGE(Threads) + #If Threads found a copy of pthreads make sure it is one of the cases the tribits + #tpl system cannot handle. + IF(Threads_FOUND AND CMAKE_USE_PTHREADS_INIT) + IF(CMAKE_THREAD_LIBS_INIT STREQUAL "" OR CMAKE_THREAD_LIBS_INIT STREQUAL "-pthread") + SET(USE_THREADS TRUE) + ENDIF() + ENDIF() +ENDIF() + +IF(USE_THREADS) + SET(TPL_Pthread_INCLUDE_DIRS "") + SET(TPL_Pthread_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}") + SET(TPL_Pthread_LIBRARY_DIRS "") +ELSE() + TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( Pthread + REQUIRED_HEADERS pthread.h + REQUIRED_LIBS_NAMES pthread + ) +ENDIF() diff --git a/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake new file mode 100644 index 0000000000..994b72b200 --- /dev/null +++ b/lib/kokkos/cmake/tpls/FindTPLQTHREAD.cmake @@ -0,0 +1,70 @@ +# @HEADER +# ************************************************************************ +# +# Trilinos: An Object-Oriented Solver Framework +# Copyright (2001) Sandia Corporation +# +# +# Copyright (2001) Sandia Corporation. Under the terms of Contract +# DE-AC04-94AL85000, there is a non-exclusive license for use of this +# work by or on behalf of the U.S. Government. Export of this program +# may require a license from the United States Government. +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the Corporation nor the names of the +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# NOTICE: The United States Government is granted for itself and others +# acting on its behalf a paid-up, nonexclusive, irrevocable worldwide +# license in this data to reproduce, prepare derivative works, and +# perform publicly and display publicly. Beginning five (5) years from +# July 25, 2001, the United States Government is granted for itself and +# others acting on its behalf a paid-up, nonexclusive, irrevocable +# worldwide license in this data to reproduce, prepare derivative works, +# distribute copies to the public, perform publicly and display +# publicly, and to permit others to do so. +# +# NEITHER THE UNITED STATES GOVERNMENT, NOR THE UNITED STATES DEPARTMENT +# OF ENERGY, NOR SANDIA CORPORATION, NOR ANY OF THEIR EMPLOYEES, MAKES +# ANY WARRANTY, EXPRESS OR IMPLIED, OR ASSUMES ANY LEGAL LIABILITY OR +# RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR USEFULNESS OF ANY +# INFORMATION, APPARATUS, PRODUCT, OR PROCESS DISCLOSED, OR REPRESENTS +# THAT ITS USE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS. +# +# ************************************************************************ +# @HEADER + + +#----------------------------------------------------------------------------- +# Hardware locality detection and control library. +# +# Acquisition information: +# Date checked: July 2014 +# Checked by: H. Carter Edwards +# Source: https://code.google.com/p/qthreads +# + +TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES( QTHREAD + REQUIRED_HEADERS qthread.h + REQUIRED_LIBS_NAMES "qthread" + ) + diff --git a/lib/kokkos/cmake/tribits.cmake b/lib/kokkos/cmake/tribits.cmake new file mode 100644 index 0000000000..34cd216f81 --- /dev/null +++ b/lib/kokkos/cmake/tribits.cmake @@ -0,0 +1,485 @@ +INCLUDE(CMakeParseArguments) +INCLUDE(CTest) + +FUNCTION(ASSERT_DEFINED VARS) + FOREACH(VAR ${VARS}) + IF(NOT DEFINED ${VAR}) + MESSAGE(SEND_ERROR "Error, the variable ${VAR} is not defined!") + ENDIF() + ENDFOREACH() +ENDFUNCTION() + +MACRO(GLOBAL_SET VARNAME) + SET(${VARNAME} ${ARGN} CACHE INTERNAL "") +ENDMACRO() + +MACRO(PREPEND_GLOBAL_SET VARNAME) + ASSERT_DEFINED(${VARNAME}) + GLOBAL_SET(${VARNAME} ${ARGN} ${${VARNAME}}) +ENDMACRO() + +FUNCTION(REMOVE_GLOBAL_DUPLICATES VARNAME) + ASSERT_DEFINED(${VARNAME}) + IF (${VARNAME}) + SET(TMP ${${VARNAME}}) + LIST(REMOVE_DUPLICATES TMP) + GLOBAL_SET(${VARNAME} ${TMP}) + ENDIF() +ENDFUNCTION() + +MACRO(TRIBITS_ADD_OPTION_AND_DEFINE USER_OPTION_NAME MACRO_DEFINE_NAME DOCSTRING DEFAULT_VALUE) + MESSAGE(STATUS "TRIBITS_ADD_OPTION_AND_DEFINE: '${USER_OPTION_NAME}' '${MACRO_DEFINE_NAME}' '${DEFAULT_VALUE}'") + SET( ${USER_OPTION_NAME} "${DEFAULT_VALUE}" CACHE BOOL "${DOCSTRING}" ) + IF(NOT ${MACRO_DEFINE_NAME} STREQUAL "") + IF(${USER_OPTION_NAME}) + GLOBAL_SET(${MACRO_DEFINE_NAME} ON) + ELSE() + GLOBAL_SET(${MACRO_DEFINE_NAME} OFF) + ENDIF() + ENDIF() +ENDMACRO() + +FUNCTION(TRIBITS_CONFIGURE_FILE PACKAGE_NAME_CONFIG_FILE) + + # Configure the file + CONFIGURE_FILE( + ${PACKAGE_SOURCE_DIR}/cmake/${PACKAGE_NAME_CONFIG_FILE}.in + ${CMAKE_CURRENT_BINARY_DIR}/${PACKAGE_NAME_CONFIG_FILE} + ) + +ENDFUNCTION() + +MACRO(TRIBITS_ADD_DEBUG_OPTION) + TRIBITS_ADD_OPTION_AND_DEFINE( + ${PROJECT_NAME}_ENABLE_DEBUG + HAVE_${PROJECT_NAME_UC}_DEBUG + "Enable a host of runtime debug checking." + OFF + ) +ENDMACRO() + + +MACRO(TRIBITS_ADD_TEST_DIRECTORIES) + FOREACH(TEST_DIR ${ARGN}) + ADD_SUBDIRECTORY(${TEST_DIR}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES) + + IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES) + FOREACH(EXAMPLE_DIR ${ARGN}) + ADD_SUBDIRECTORY(${EXAMPLE_DIR}) + ENDFOREACH() + ENDIF() + +ENDMACRO() + +MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT) + SET(PROP_VALUES) + FOREACH(TARGET_X ${ARGN}) + LIST(APPEND PROP_VALUES "$") + ENDFOREACH() + SET_TARGET_PROPERTIES(${TARGET_NAME} PROPERTIES ${PROP_OUT} "${PROP_VALUES}") +ENDMACRO() + +MACRO(ADD_INTERFACE_LIBRARY LIB_NAME) + FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "") + ADD_LIBRARY(${LIB_NAME} STATIC ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp) + SET_TARGET_PROPERTIES(${LIB_NAME} PROPERTIES INTERFACE TRUE) +ENDMACRO() + +# Older versions of cmake does not make include directories transitive +MACRO(TARGET_LINK_AND_INCLUDE_LIBRARIES TARGET_NAME) + TARGET_LINK_LIBRARIES(${TARGET_NAME} LINK_PUBLIC ${ARGN}) + FOREACH(DEP_LIB ${ARGN}) + TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) + TARGET_INCLUDE_DIRECTORIES(${TARGET_NAME} PUBLIC $) + ENDFOREACH() +ENDMACRO() + +FUNCTION(TRIBITS_ADD_LIBRARY LIBRARY_NAME) + + SET(options STATIC SHARED TESTONLY NO_INSTALL_LIB_OR_HEADERS CUDALIBRARY) + SET(oneValueArgs) + SET(multiValueArgs HEADERS HEADERS_INSTALL_SUBDIR NOINSTALLHEADERS SOURCES DEPLIBS IMPORTEDLIBS DEFINES ADDED_LIB_TARGET_NAME_OUT) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF(PARSE_HEADERS) + LIST(REMOVE_DUPLICATES PARSE_HEADERS) + ENDIF() + IF(PARSE_SOURCES) + LIST(REMOVE_DUPLICATES PARSE_SOURCES) + ENDIF() + + # Local variable to hold all of the libraries that will be directly linked + # to this library. + SET(LINK_LIBS ${${PACKAGE_NAME}_DEPS}) + + # Add dependent libraries passed directly in + + IF (PARSE_IMPORTEDLIBS) + LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) + ENDIF() + + IF (PARSE_DEPLIBS) + LIST(APPEND LINK_LIBS ${PARSE_DEPLIBS}) + ENDIF() + + # Add the library and all the dependencies + + IF (PARSE_DEFINES) + ADD_DEFINITIONS(${PARSE_DEFINES}) + ENDIF() + + IF (PARSE_STATIC) + SET(STATIC_KEYWORD "STATIC") + ELSE() + SET(STATIC_KEYWORD) + ENDIF() + + IF (PARSE_SHARED) + SET(SHARED_KEYWORD "SHARED") + ELSE() + SET(SHARED_KEYWORD) + ENDIF() + + IF (PARSE_TESTONLY) + SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") + ELSE() + SET(EXCLUDE_FROM_ALL_KEYWORD) + ENDIF() + IF (NOT PARSE_CUDALIBRARY) + ADD_LIBRARY( + ${LIBRARY_NAME} + ${STATIC_KEYWORD} + ${SHARED_KEYWORD} + ${EXCLUDE_FROM_ALL_KEYWORD} + ${PARSE_HEADERS} + ${PARSE_NOINSTALLHEADERS} + ${PARSE_SOURCES} + ) + ELSE() + CUDA_ADD_LIBRARY( + ${LIBRARY_NAME} + ${PARSE_HEADERS} + ${PARSE_NOINSTALLHEADERS} + ${PARSE_SOURCES} + ) + ENDIF() + + TARGET_LINK_AND_INCLUDE_LIBRARIES(${LIBRARY_NAME} ${LINK_LIBS}) + + IF (NOT PARSE_TESTONLY OR PARSE_NO_INSTALL_LIB_OR_HEADERS) + + INSTALL( + TARGETS ${LIBRARY_NAME} + EXPORT ${PROJECT_NAME} + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + ARCHIVE DESTINATION lib + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + FILES ${PARSE_HEADERS} + EXPORT ${PROJECT_NAME} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + + INSTALL( + DIRECTORY ${PARSE_HEADERS_INSTALL_SUBDIR} + EXPORT ${PROJECT_NAME} + DESTINATION include + COMPONENT ${PACKAGE_NAME} + ) + + ENDIF() + + IF (NOT PARSE_TESTONLY) + PREPEND_GLOBAL_SET(${PACKAGE_NAME}_LIBS ${LIBRARY_NAME}) + REMOVE_GLOBAL_DUPLICATES(${PACKAGE_NAME}_LIBS) + ENDIF() + +ENDFUNCTION() + +FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME) + + SET(options NOEXEPREFIX NOEXESUFFIX ADD_DIR_TO_NAME INSTALLABLE TESTONLY) + SET(oneValueArgs ADDED_EXE_TARGET_NAME_OUT) + SET(multiValueArgs SOURCES CATEGORIES HOST XHOST HOSTTYPE XHOSTTYPE DIRECTORY TESTONLYLIBS IMPORTEDLIBS DEPLIBS COMM LINKER_LANGUAGE TARGET_DEFINES DEFINES) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + IF (PARSE_TARGET_DEFINES) + TARGET_COMPILE_DEFINITIONS(${EXE_NAME} PUBLIC ${PARSE_TARGET_DEFINES}) + ENDIF() + + SET(LINK_LIBS PACKAGE_${PACKAGE_NAME}) + + IF (PARSE_TESTONLYLIBS) + LIST(APPEND LINK_LIBS ${PARSE_TESTONLYLIBS}) + ENDIF() + + IF (PARSE_IMPORTEDLIBS) + LIST(APPEND LINK_LIBS ${PARSE_IMPORTEDLIBS}) + ENDIF() + + SET (EXE_SOURCES) + IF(PARSE_DIRECTORY) + FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) + IF(IS_ABSOLUTE ${SOURCE_FILE}) + SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) + ELSE() + SET (EXE_SOURCES ${EXE_SOURCES} ${PARSE_DIRECTORY}/${SOURCE_FILE}) + ENDIF() + ENDFOREACH( ) + ELSE() + FOREACH( SOURCE_FILE ${PARSE_SOURCES} ) + SET (EXE_SOURCES ${EXE_SOURCES} ${SOURCE_FILE}) + ENDFOREACH( ) + ENDIF() + + SET(EXE_BINARY_NAME ${EXE_NAME}) + IF(DEFINED PACKAGE_NAME AND NOT PARSE_NOEXEPREFIX) + SET(EXE_BINARY_NAME ${PACKAGE_NAME}_${EXE_BINARY_NAME}) + ENDIF() + + IF (PARSE_TESTONLY) + SET(EXCLUDE_FROM_ALL_KEYWORD "EXCLUDE_FROM_ALL") + ELSE() + SET(EXCLUDE_FROM_ALL_KEYWORD) + ENDIF() + ADD_EXECUTABLE(${EXE_BINARY_NAME} ${EXCLUDE_FROM_ALL_KEYWORD} ${EXE_SOURCES}) + + TARGET_LINK_AND_INCLUDE_LIBRARIES(${EXE_BINARY_NAME} ${LINK_LIBS}) + + IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) + SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${EXE_BINARY_NAME} PARENT_SCOPE) + ENDIF() + + IF(PARSE_INSTALLABLE) + INSTALL( + TARGETS ${EXE_BINARY_NAME} + EXPORT ${PROJECT_NAME} + DESTINATION bin + ) + ENDIF() +ENDFUNCTION() + +ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR}) + +FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME) + + SET(options STANDARD_PASS_OUTPUT WILL_FAIL) + SET(oneValueArgs PASS_REGULAR_EXPRESSION FAIL_REGULAR_EXPRESSION ENVIRONMENT TIMEOUT CATEGORIES ADDED_TESTS_NAMES_OUT ADDED_EXE_TARGET_NAME_OUT) + SET(multiValueArgs) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + TRIBITS_ADD_EXECUTABLE(${EXE_NAME} TESTONLY ADDED_EXE_TARGET_NAME_OUT TEST_NAME ${PARSE_UNPARSED_ARGUMENTS}) + + IF(WIN32) + ADD_TEST(NAME ${TEST_NAME} WORKING_DIRECTORY ${LIBRARY_OUTPUT_PATH} COMMAND ${TEST_NAME}${CMAKE_EXECUTABLE_SUFFIX}) + ELSE() + ADD_TEST(NAME ${TEST_NAME} COMMAND ${TEST_NAME}) + ENDIF() + ADD_DEPENDENCIES(check ${TEST_NAME}) + + IF(PARSE_FAIL_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES FAIL_REGULAR_EXPRESSION ${PARSE_FAIL_REGULAR_EXPRESSION}) + ENDIF() + + IF(PARSE_PASS_REGULAR_EXPRESSION) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES PASS_REGULAR_EXPRESSION ${PARSE_PASS_REGULAR_EXPRESSION}) + ENDIF() + + IF(PARSE_WILL_FAIL) + SET_TESTS_PROPERTIES(${TEST_NAME} PROPERTIES WILL_FAIL ${PARSE_WILL_FAIL}) + ENDIF() + + IF(PARSE_ADDED_TESTS_NAMES_OUT) + SET(${PARSE_ADDED_TESTS_NAMES_OUT} ${TEST_NAME} PARENT_SCOPE) + ENDIF() + + IF(PARSE_ADDED_EXE_TARGET_NAME_OUT) + SET(${PARSE_ADDED_EXE_TARGET_NAME_OUT} ${TEST_NAME} PARENT_SCOPE) + ENDIF() + +ENDFUNCTION() + +MACRO(TIBITS_CREATE_IMPORTED_TPL_LIBRARY TPL_NAME) + ADD_INTERFACE_LIBRARY(TPL_LIB_${TPL_NAME}) + TARGET_LINK_LIBRARIES(TPL_LIB_${TPL_NAME} LINK_PUBLIC ${TPL_${TPL_NAME}_LIBRARIES}) + TARGET_INCLUDE_DIRECTORIES(TPL_LIB_${TPL_NAME} INTERFACE ${TPL_${TPL_NAME}_INCLUDE_DIRS}) +ENDMACRO() + +FUNCTION(TRIBITS_TPL_FIND_INCLUDE_DIRS_AND_LIBRARIES TPL_NAME) + + SET(options MUST_FIND_ALL_LIBS MUST_FIND_ALL_HEADERS NO_PRINT_ENABLE_SUCCESS_FAIL) + SET(oneValueArgs) + SET(multiValueArgs REQUIRED_HEADERS REQUIRED_LIBS_NAMES) + + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + SET(_${TPL_NAME}_ENABLE_SUCCESS TRUE) + IF (PARSE_REQUIRED_LIBS_NAMES) + FIND_LIBRARY(TPL_${TPL_NAME}_LIBRARIES NAMES ${PARSE_REQUIRED_LIBS_NAMES}) + IF(NOT TPL_${TPL_NAME}_LIBRARIES) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + IF (PARSE_REQUIRED_HEADERS) + FIND_PATH(TPL_${TPL_NAME}_INCLUDE_DIRS NAMES ${PARSE_REQUIRED_HEADERS}) + IF(NOT TPL_${TPL_NAME}_INCLUDE_DIRS) + SET(_${TPL_NAME}_ENABLE_SUCCESS FALSE) + ENDIF() + ENDIF() + + + IF (_${TPL_NAME}_ENABLE_SUCCESS) + TIBITS_CREATE_IMPORTED_TPL_LIBRARY(${TPL_NAME}) + ENDIF() + +ENDFUNCTION() + +MACRO(TRIBITS_PROCESS_TPL_DEP_FILE TPL_FILE) + GET_FILENAME_COMPONENT(TPL_NAME ${TPL_FILE} NAME_WE) + INCLUDE("${TPL_FILE}") + IF(TARGET TPL_LIB_${TPL_NAME}) + MESSAGE(STATUS "Found tpl library: ${TPL_NAME}") + SET(TPL_ENABLE_${TPL_NAME} TRUE) + ELSE() + MESSAGE(STATUS "Tpl library not found: ${TPL_NAME}") + SET(TPL_ENABLE_${TPL_NAME} FALSE) + ENDIF() +ENDMACRO() + +MACRO(PREPEND_TARGET_SET VARNAME TARGET_NAME TYPE) + IF(TYPE STREQUAL "REQUIRED") + SET(REQUIRED TRUE) + ELSE() + SET(REQUIRED FALSE) + ENDIF() + IF(TARGET ${TARGET_NAME}) + PREPEND_GLOBAL_SET(${VARNAME} ${TARGET_NAME}) + ELSE() + IF(REQUIRED) + MESSAGE(FATAL_ERROR "Missing dependency ${TARGET_NAME}") + ENDIF() + ENDIF() +ENDMACRO() + +MACRO(TRIBITS_APPEND_PACKAGE_DEPS DEP_LIST TYPE) + FOREACH(DEP ${ARGN}) + PREPEND_GLOBAL_SET(${DEP_LIST} PACKAGE_${DEP}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_APPEND_TPLS_DEPS DEP_LIST TYPE) + FOREACH(DEP ${ARGN}) + PREPEND_TARGET_SET(${DEP_LIST} TPL_LIB_${DEP} ${TYPE}) + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_ENABLE_TPLS) + FOREACH(TPL ${ARGN}) + IF(TARGET ${TPL}) + GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} TRUE) + ELSE() + GLOBAL_SET(${PACKAGE_NAME}_ENABLE_${TPL} FALSE) + ENDIF() + ENDFOREACH() +ENDMACRO() + +MACRO(TRIBITS_PACKAGE_DEFINE_DEPENDENCIES) + + SET(options) + SET(oneValueArgs) + SET(multiValueArgs + LIB_REQUIRED_PACKAGES + LIB_OPTIONAL_PACKAGES + TEST_REQUIRED_PACKAGES + TEST_OPTIONAL_PACKAGES + LIB_REQUIRED_TPLS + LIB_OPTIONAL_TPLS + TEST_REQUIRED_TPLS + TEST_OPTIONAL_TPLS + REGRESSION_EMAIL_LIST + SUBPACKAGES_DIRS_CLASSIFICATIONS_OPTREQS + ) + CMAKE_PARSE_ARGUMENTS(PARSE "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + GLOBAL_SET(${PACKAGE_NAME}_DEPS "") + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_PACKAGES}) + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_PACKAGES}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS REQUIRED ${PARSE_LIB_REQUIRED_TPLS}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_DEPS OPTIONAL ${PARSE_LIB_OPTIONAL_TPLS}) + + GLOBAL_SET(${PACKAGE_NAME}_TEST_DEPS "") + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_PACKAGES}) + TRIBITS_APPEND_PACKAGE_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_PACKAGES}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS REQUIRED ${PARSE_TEST_REQUIRED_TPLS}) + TRIBITS_APPEND_TPLS_DEPS(${PACKAGE_NAME}_TEST_DEPS OPTIONAL ${PARSE_TEST_OPTIONAL_TPLS}) + + TRIBITS_ENABLE_TPLS(${PARSE_LIB_REQUIRED_TPLS} ${PARSE_LIB_OPTIONAL_TPLS} ${PARSE_TEST_REQUIRED_TPLS} ${PARSE_TEST_OPTIONAL_TPLS}) + +ENDMACRO() + +MACRO(TRIBITS_SUBPACKAGE NAME) + SET(PACKAGE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + SET(PARENT_PACKAGE_NAME ${PACKAGE_NAME}) + SET(PACKAGE_NAME ${PACKAGE_NAME}${NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + + ADD_INTERFACE_LIBRARY(PACKAGE_${PACKAGE_NAME}) + + GLOBAL_SET(${PACKAGE_NAME}_LIBS "") + + INCLUDE(${PACKAGE_SOURCE_DIR}/cmake/Dependencies.cmake) + +ENDMACRO(TRIBITS_SUBPACKAGE) + +MACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) + TARGET_LINK_AND_INCLUDE_LIBRARIES(PACKAGE_${PACKAGE_NAME} ${${PACKAGE_NAME}_LIBS}) +ENDMACRO(TRIBITS_SUBPACKAGE_POSTPROCESS) + +MACRO(TRIBITS_PACKAGE_DECL NAME) + + PROJECT(${NAME}) + STRING(TOUPPER ${PROJECT_NAME} PROJECT_NAME_UC) + SET(PACKAGE_NAME ${PROJECT_NAME}) + STRING(TOUPPER ${PACKAGE_NAME} PACKAGE_NAME_UC) + + SET(TRIBITS_DEPS_DIR "${CMAKE_SOURCE_DIR}/cmake/deps") + FILE(GLOB TPLS_FILES "${TRIBITS_DEPS_DIR}/*.cmake") + FOREACH(TPL_FILE ${TPLS_FILES}) + TRIBITS_PROCESS_TPL_DEP_FILE(${TPL_FILE}) + ENDFOREACH() + +ENDMACRO() + + +MACRO(TRIBITS_PROCESS_SUBPACKAGES) + FILE(GLOB SUBPACKAGES RELATIVE ${CMAKE_SOURCE_DIR} */cmake/Dependencies.cmake) + FOREACH(SUBPACKAGE ${SUBPACKAGES}) + GET_FILENAME_COMPONENT(SUBPACKAGE_CMAKE ${SUBPACKAGE} DIRECTORY) + GET_FILENAME_COMPONENT(SUBPACKAGE_DIR ${SUBPACKAGE_CMAKE} DIRECTORY) + ADD_SUBDIRECTORY(${SUBPACKAGE_DIR}) + ENDFOREACH() +ENDMACRO(TRIBITS_PROCESS_SUBPACKAGES) + +MACRO(TRIBITS_PACKAGE_DEF) +ENDMACRO(TRIBITS_PACKAGE_DEF) + +MACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) +ENDMACRO(TRIBITS_EXCLUDE_AUTOTOOLS_FILES) + +MACRO(TRIBITS_EXCLUDE_FILES) +ENDMACRO(TRIBITS_EXCLUDE_FILES) + +MACRO(TRIBITS_PACKAGE_POSTPROCESS) +ENDMACRO(TRIBITS_PACKAGE_POSTPROCESS) + diff --git a/lib/kokkos/config/configure_compton_cpu.sh b/lib/kokkos/config/configure_compton_cpu.sh new file mode 100755 index 0000000000..17287fb848 --- /dev/null +++ b/lib/kokkos/config/configure_compton_cpu.sh @@ -0,0 +1,190 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build with Intel compiler + +INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_compton_mic.sh b/lib/kokkos/config/configure_compton_mic.sh new file mode 100755 index 0000000000..7f9aee13f9 --- /dev/null +++ b/lib/kokkos/config/configure_compton_mic.sh @@ -0,0 +1,186 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build for MIC architecture: + +INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos.sh b/lib/kokkos/config/configure_kokkos.sh new file mode 100755 index 0000000000..592e7f5936 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos.sh @@ -0,0 +1,293 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +#----------------------------------------------------------------------------- + +USE_CUDA_ARCH= +USE_THREAD= +USE_OPENMP= +USE_INTEL= +USE_XEON_PHI= +HWLOC_BASE_DIR= +MPI_BASE_DIR= +BLAS_LIB_DIR= +LAPACK_LIB_DIR= + +if [ 1 ] ; then + # Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu + USE_CUDA_ARCH="35" + USE_OPENMP=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" + MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" + BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" + LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" + +elif [ ] ; then + # Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu + USE_CUDA_ARCH="35" + USE_THREAD=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" + MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" + BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib" + LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib" + +elif [ ] ; then + # Platform 'kokkos-dev' with Xeon Phi and hwloc + USE_OPENMP=ON + USE_INTEL=ON + USE_XEON_PHI=ON + HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106" + +elif [ ] ; then + # Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu + USE_CUDA_ARCH="20" + USE_OPENMP=ON + HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + MPI_BASE_DIR="/home/sems/common/openmpi/current" + +elif [ ] ; then + # Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu + USE_CUDA_ARCH="20" + USE_THREAD=ON + HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + MPI_BASE_DIR="/home/sems/common/openmpi/current" + +fi + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure command line options: + +CMAKE_CONFIGURE="" +CMAKE_CXX_FLAGS="" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- + +if [ 1 ] ; then + + # Configure for Tpetra/Kokkos: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode" + + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE" + + if [ -n "${USE_CUDA_ARCH}" ] ; then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON" + + fi + +fi + +if [ 1 ] ; then + + # Configure for Stokhos: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON" + +fi + +if [ 1 ] ; then + + # Configure for TrilinosCouplings: + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON" + +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON" + +if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" +fi + +#----------------------------------------------------------------------------- +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Kokkos use pthread configuation: + +if [ "${USE_THREAD}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Kokkos use OpenMP configuation: + +if [ "${USE_OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${USE_CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}" + + if [ "${USE_OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +# Cross-compile for Intel Xeon Phi: + +if [ "${USE_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- + +if [ -n "${CMAKE_CXX_FLAGS}" ] ; then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'" + +fi + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}" + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_bgq.sh b/lib/kokkos/config/configure_kokkos_bgq.sh new file mode 100755 index 0000000000..73236937ea --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_bgq.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# to build: +# build on bgq-b[1-12] +# module load sierra-devel +# run this configure file +# make + +# to run: +# ssh bgq-login +# cd /scratch/username/... +# export OMP_PROC_BIND and XLSMPOPTS environment variables +# run with srun + +# Note: hwloc does not work to get or set cpubindings on bgq. +# Use the openmp backend and the openmp environment variables. +# +# Only the mpi wrappers seem to be setup for cross-compile, +# so it is important that this configure enables MPI and uses mpigcc wrappers. + + + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="../Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2" + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_dev.sh b/lib/kokkos/config/configure_kokkos_dev.sh new file mode 100755 index 0000000000..ac61dec602 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_dev.sh @@ -0,0 +1,216 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +#CMAKE_BUILD_TYPE=DEBUG +#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +#CUDA_ARCH="" +#CUDA_ARCH="20" +#CUDA_ARCH="30" +CUDA_ARCH="35" + +# Build with OpenMP + +OPENMP=ON +PTHREADS=ON + +# Build host code with Intel compiler: + +INTEL=OFF + +# Build for MIC architecture: + +INTEL_XEON_PHI=OFF + +# Build with HWLOC at location: + +#HWLOC_BASE_DIR="" +#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7" +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +# Location for MPI to use in examples: + +#MPI_BASE_DIR="" +#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7" +MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3" +#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +if [ "${PTHREADS}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +if [ "${OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + + if [ "${OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_kokkos_nvidia.sh b/lib/kokkos/config/configure_kokkos_nvidia.sh new file mode 100755 index 0000000000..f78b7dce78 --- /dev/null +++ b/lib/kokkos/config/configure_kokkos_nvidia.sh @@ -0,0 +1,204 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +# CUDA_ARCH="" +CUDA_ARCH="20" +# CUDA_ARCH="30" +# CUDA_ARCH="35" + +# Build with OpenMP + +OPENMP=ON + +# Build host code with Intel compiler: + +# INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/sems/common/hwloc/current" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="/home/sems/common/openmpi/current" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +if [ "${OPENMP}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + + if [ "${OPENMP}" = "ON" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + fi + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_shannon.sh b/lib/kokkos/config/configure_shannon.sh new file mode 100755 index 0000000000..8bd175b031 --- /dev/null +++ b/lib/kokkos/config/configure_shannon.sh @@ -0,0 +1,190 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +# Additional command-line arguments given to this script will be +# passed directly to CMake. +# + +# +# Force CMake to re-evaluate build options. +# +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +#----------------------------------------------------------------------------- +# Incrementally construct cmake configure options: + +CMAKE_CONFIGURE="" + +#----------------------------------------------------------------------------- +# Location of Trilinos source tree: + +CMAKE_PROJECT_DIR="${HOME}/Trilinos" + +# Location for installation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`" + +#----------------------------------------------------------------------------- +# General build options. +# Use a variable so options can be propagated to CUDA compiler. + +CMAKE_VERBOSE_MAKEFILE=OFF +CMAKE_BUILD_TYPE=RELEASE +# CMAKE_BUILD_TYPE=DEBUG + +#----------------------------------------------------------------------------- +# Build for CUDA architecture: + +# CUDA_ARCH="" +# CUDA_ARCH="20" +# CUDA_ARCH="30" +CUDA_ARCH="35" + +# Build host code with Intel compiler: + +INTEL=ON + +# Build for MIC architecture: + +# INTEL_XEON_PHI=ON + +# Build with HWLOC at location: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2" + +# Location for MPI to use in examples: + +MPI_BASE_DIR="" + +#----------------------------------------------------------------------------- +# MPI configuation only used for examples: +# +# Must have the MPI_BASE_DIR so that the +# include path can be passed to the Cuda compiler + +if [ -n "${MPI_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}" +else + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF" +fi + +#----------------------------------------------------------------------------- +# Pthread configuation: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP configuation: + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Configure packages for kokkos-only: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +#----------------------------------------------------------------------------- +# Hardware locality cmake configuration: + +if [ -n "${HWLOC_BASE_DIR}" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" +fi + +#----------------------------------------------------------------------------- +# Cuda cmake configuration: + +if [ -n "${CUDA_ARCH}" ] ; +then + + # Options to CUDA_NVCC_FLAGS must be semi-colon delimited, + # this is different than the standard CMAKE_CXX_FLAGS syntax. + + CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}" + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi" + + if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ; + then + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g" + else + CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3" + fi + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}" + +fi + +#----------------------------------------------------------------------------- + +if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ; +then + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" +fi + +#----------------------------------------------------------------------------- + +# Cross-compile for Intel Xeon Phi: + +if [ "${INTEL_XEON_PHI}" = "ON" ] ; +then + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600" + + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread" + + # Cannot cross-compile fortran compatibility checks on the MIC: + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" + + # Tell cmake the answers to compile-and-execute tests + # to prevent cmake from executing a cross-compiled program. + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0" + CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0" + +fi + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}" + +#----------------------------------------------------------------------------- + +echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}" + +cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh b/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh new file mode 100755 index 0000000000..0baa83aefe --- /dev/null +++ b/lib/kokkos/config/configure_tpetra_kokkos_cuda_nvcc_wrapper.sh @@ -0,0 +1,140 @@ +#!/bin/bash +# +# This script uses CUDA, OpenMP, and MPI. +# +# Before invoking this script, set the OMPI_CXX environment variable +# to point to nvcc_wrapper, wherever it happens to live. (If you use +# an MPI implementation other than OpenMPI, set the corresponding +# environment variable instead.) +# + +rm -f CMakeCache.txt; +rm -rf CMakeFiles +EXTRA_ARGS=$@ +MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5" +CUDA_PATH="/opt/nvidia/cuda/6.5.14" + +# +# As long as there are any .cu files in Trilinos, we'll need to set +# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and +# lets nvcc_wrapper handle them as .cpp files, then we won't need to +# set CUDA_NVCC_FLAGS. As it is, given that we need to set +# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as +# nvcc_wrapper passes to nvcc. +# +CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include" +CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp" +CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM" + +cmake \ + -D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \ + -D CMAKE_BUILD_TYPE:STRING=DEBUG \ + -D CMAKE_CXX_FLAGS:STRING="-g -Wall" \ + -D CMAKE_C_FLAGS:STRING="-g -Wall" \ + -D CMAKE_FORTRAN_FLAGS:STRING="" \ + -D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \ + -D Trilinos_ENABLE_Triutils=OFF \ + -D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \ + -D Trilinos_ENABLE_DEBUG:BOOL=OFF \ + -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \ + -D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \ + -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \ + -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \ + -D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \ + -D BUILD_SHARED_LIBS:BOOL=OFF \ + -D DART_TESTING_TIMEOUT:STRING=600 \ + -D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \ + \ + \ + -D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ + -D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ + -D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \ + -D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \ + -D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \ + -D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \ + -D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \ + \ + \ + -D Trilinos_ENABLE_CXX11:BOOL=OFF \ + -D TPL_ENABLE_MPI:BOOL=ON \ + -D Trilinos_ENABLE_OpenMP:BOOL=ON \ + -D Trilinos_ENABLE_ThreadPool:BOOL=ON \ + \ + \ + -D TPL_ENABLE_CUDA:BOOL=ON \ + -D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \ + -D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \ + -D TPL_ENABLE_Thrust:BOOL=OFF \ + -D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \ + -D TPL_ENABLE_CUSPARSE:BOOL=OFF \ + -D TPL_ENABLE_Cusp:BOOL=OFF \ + -D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \ + -D CUDA_VERBOSE_BUILD:BOOL=OFF \ + -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \ + \ + \ + -D TPL_ENABLE_HWLOC=OFF \ + -D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \ + -D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \ + -D TPL_ENABLE_BinUtils=OFF \ + -D TPL_ENABLE_BLAS:STRING=ON \ + -D TPL_ENABLE_LAPACK:STRING=ON \ + -D TPL_ENABLE_MKL:STRING=OFF \ + -D TPL_ENABLE_HWLOC:STRING=OFF \ + -D TPL_ENABLE_GTEST:STRING=ON \ + -D TPL_ENABLE_SuperLU=ON \ + -D TPL_ENABLE_BLAS=ON \ + -D TPL_ENABLE_LAPACK=ON \ + -D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \ + -D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \ + \ + \ + -D Trilinos_Enable_Kokkos:BOOL=ON \ + -D Trilinos_ENABLE_KokkosCore:BOOL=ON \ + -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \ + -D Trilinos_ENABLE_KokkosContainers:BOOL=ON \ + -D Trilinos_ENABLE_TpetraKernels:BOOL=ON \ + -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \ + -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \ + -D Trilinos_ENABLE_KokkosExample:BOOL=ON \ + -D Kokkos_ENABLE_EXAMPLES:BOOL=ON \ + -D Kokkos_ENABLE_TESTS:BOOL=OFF \ + -D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \ + -D TpetraClassic_ENABLE_OpenMPNode=OFF \ + -D TpetraClassic_ENABLE_TPINode=OFF \ + -D TpetraClassic_ENABLE_MKL=OFF \ + -D Kokkos_ENABLE_Cuda_UVM=ON \ + \ + \ + -D Trilinos_ENABLE_Teuchos:BOOL=ON \ + -D Teuchos_ENABLE_COMPLEX:BOOL=OFF \ + \ + \ + -D Trilinos_ENABLE_Tpetra:BOOL=ON \ + -D Tpetra_ENABLE_KokkosCore=ON \ + -D Tpetra_ENABLE_Kokkos_DistObject=OFF \ + -D Tpetra_ENABLE_Kokkos_Refactor=ON \ + -D Tpetra_ENABLE_TESTS=ON \ + -D Tpetra_ENABLE_EXAMPLES=ON \ + -D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \ + \ + \ + -D Trilinos_ENABLE_Belos=OFF \ + -D Trilinos_ENABLE_Amesos=OFF \ + -D Trilinos_ENABLE_Amesos2=OFF \ + -D Trilinos_ENABLE_Ifpack=OFF \ + -D Trilinos_ENABLE_Ifpack2=OFF \ + -D Trilinos_ENABLE_Epetra=OFF \ + -D Trilinos_ENABLE_EpetraExt=OFF \ + -D Trilinos_ENABLE_Zoltan=OFF \ + -D Trilinos_ENABLE_Zoltan2=OFF \ + -D Trilinos_ENABLE_MueLu=OFF \ + -D Belos_ENABLE_TESTS=ON \ + -D Belos_ENABLE_EXAMPLES=ON \ + -D MueLu_ENABLE_TESTS=ON \ + -D MueLu_ENABLE_EXAMPLES=ON \ + -D Ifpack2_ENABLE_TESTS=ON \ + -D Ifpack2_ENABLE_EXAMPLES=ON \ + $EXTRA_ARGS \ +${HOME}/Trilinos + diff --git a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt new file mode 100644 index 0000000000..9f56f2fd48 --- /dev/null +++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt @@ -0,0 +1,153 @@ +// -------------------------------------------------------------------------------- // + +The following steps are for workstations/servers with the SEMS environment installed. + +// -------------------------------------------------------------------------------- // +Summary: + +- Step 1: Rigorous testing of Kokkos' develop branch for each backend (Serial, OpenMP, Threads, Cuda) with all supported compilers. + +- Step 2: Snapshot Kokkos' develop branch into current Trilinos develop branch. + +- Step 3: Build and test Trilinos with combinations of compilers, types, backends. + +- Step 4: Promote Kokkos develop branch to master if the snapshot does not cause any new tests to fail; else track/fix causes of new failures. + +- Step 5: Snapshot Kokkos tagged master branch into Trilinos and push Trilinos. +// -------------------------------------------------------------------------------- // + + +// -------------------------------------------------------------------------------- // + +Step 1: + 1.1. Update kokkos develop branch (NOT a fork) + + (From kokkos directory): + git fetch --all + git checkout develop + git reset --hard origin/develop + + 1.2. Create a testing directory - here the directory is created within the kokkos directory + + mkdir testing + cd testing + + 1.3. Run the test_all_sandia script; various compiler and build-list options can be specified + + ../config/test_all_sandia + + 1.4 Clean repository of untracked files + + cd ../ + git clean -df + +// -------------------------------------------------------------------------------- // + +Step 2: + 2.1 Update Trilinos develop branch + + (From Trilinos directory): + git checkout develop + git fetch --all + git reset --hard origin/develop + git clean -df + + 2.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files + + module load python/2.7.9 + python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages + +// -------------------------------------------------------------------------------- // + +Step 3: + 3.1. Build and test Trilinos with 3 different configurations; a configure-all script is provided in Trilinos and should be modified to test each of the following 3 configurations with appropriate environment variable(s): + + - GCC/4.7.2-OpenMP/Complex + Run tests with the following environment variable: + + export OMP_NUM_THREADS=2 + + + - Intel/15.0.2-Serial/NoComplex + + + - GCC/4.8.4/CUDA/7.5.18-Cuda/Serial/NoComplex + Run tests with the following environment variables: + + export CUDA_LAUNCH_BLOCKING=1 + export CUDA_MANAGED_FORCE_DEVICE_ALLOC=1 + + + mkdir Build + cd Build + cp TRILINOS_PATH/sampleScripts/Sandia-SEMS/configure-all ./ + ** Set the path to Trilinos appropriately within the configure-all script ** + source $SEMS_MODULE_ROOT/utils/sems-modules-init.sh kokkos + source configure-all + make -k (-k means "keep going" to get past build errors; -j12 can also be specified to build with 12 threads, for example) + ctest + + 3.2. Compare the failed test output to the test output on the dashboard ( testing.sandia.gov/cdash select Trilinos ); investigate and fix problems if new tests fail after the Kokkos snapshot + +// -------------------------------------------------------------------------------- // + +Step 4: + 4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github + + - DO NOT fast-forward the merge!!!! + + (From kokkos directory): + git checkout master + git fetch --all + # Ensure we are on the current origin/master + git reset --hard origin/master + git merge --no-ff origin/develop + + 4.2. Update the tag in kokkos/config/master_history.txt + Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate + Tag format: #.#.## + + # Prepend master_history.txt with + + # tag: #.#.## + # date: mm/dd/yyyy + # master: sha1 + # develop: sha1 + # ----------------------- + + git commit --amend -a + + git tag -a #.#.## + tag: #.#.## + date: mm/dd/yyyy + master: sha1 + develop: sha1 + + git push --follow-tags origin master + +// -------------------------------------------------------------------------------- // + +Step 5: + 5.1. Make sure Trilinos is up-to-date - chances are other changes have been committed since the integration testing process began. If a substantial change has occurred that may be affected by the snapshot the testing procedure may need to be repeated + + (From Trilinos directory): + git checkout develop + git fetch --all + git reset --hard origin/develop + git clean -df + + 5.2. Snapshot Kokkos master branch into Trilinos + + (From kokkos directory): + git fetch --all + git checkout tags/#.#.## + git clean -df + + python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages + + 5.3. Push the updated develop branch of Trilinos to Github - congratulations!!! + + (From Trilinos directory): + git push + +// -------------------------------------------------------------------------------- // diff --git a/lib/kokkos/config/kokkos_dev/config-core-all.sh b/lib/kokkos/config/kokkos_dev/config-core-all.sh new file mode 100755 index 0000000000..fa588c778f --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-all.sh @@ -0,0 +1,113 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda, OpenMP, Threads, Qthread, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# Qthread + +QTHREAD_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREAD:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_INCLUDE_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREAD_LIBRARY_DIRS:FILEPATH=${QTHREAD_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh new file mode 100755 index 0000000000..c2e17bb944 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda-omp-hwloc.sh @@ -0,0 +1,104 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda, OpenMP, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF so tribits doesn't automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cuda.sh b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh new file mode 100755 index 0000000000..39b72d5ce1 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cuda.sh @@ -0,0 +1,88 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Cuda +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu +# +# The 'nvcc-wrapper' module should load a script that matches +# kokkos/config/nvcc_wrapper +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" + +#----------------------------------------------------------------------------- +# Cuda using GNU, use the nvcc_wrapper to build CUDA source + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +# Pthread explicitly OFF, otherwise tribits will automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh new file mode 100755 index 0000000000..b83a535416 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-cxx11-omp.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# C++11, OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF so tribits doesn't automatically activate + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh b/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh new file mode 100755 index 0000000000..d2e06a4ebd --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-dbg-none.sh @@ -0,0 +1,78 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh new file mode 100755 index 0000000000..e2ab1f1c00 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-intel-cuda-omp.sh @@ -0,0 +1,89 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Intel, OpenMP, Cuda +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh new file mode 100755 index 0000000000..fd56d41161 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-intel-omp.sh @@ -0,0 +1,84 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Intel, OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 intel/13.SP1.1.106 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Pthread explicitly OFF + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-omp.sh b/lib/kokkos/config/kokkos_dev/config-core-omp.sh new file mode 100755 index 0000000000..f91ecd5254 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-omp.sh @@ -0,0 +1,77 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# OpenMP +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# OpenMP + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON" + +# Pthread explicitly OFF, otherwise tribits will automatically turn it on + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh b/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh new file mode 100755 index 0000000000..19ab969023 --- /dev/null +++ b/lib/kokkos/config/kokkos_dev/config-core-threads-hwloc.sh @@ -0,0 +1,87 @@ +#!/bin/sh +# +# Copy this script, put it outside the Trilinos source directory, and +# build there. +# +#----------------------------------------------------------------------------- +# Building on 'kokkos-dev.sandia.gov' with enabled capabilities: +# +# Threads, hwloc +# +# module loaded on 'kokkos-dev.sandia.gov' for this build +# +# module load cmake/2.8.11.2 gcc/4.8.3 +# +#----------------------------------------------------------------------------- +# Source and installation directories: + +TRILINOS_SOURCE_DIR=${HOME}/Trilinos +TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F` + +CMAKE_CONFIGURE="" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}" + +#----------------------------------------------------------------------------- +# Debug/optimized + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE" + +#----------------------------------------------------------------------------- + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++" + +#----------------------------------------------------------------------------- +# Configure for Kokkos subpackages and tests: + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON" + +#----------------------------------------------------------------------------- +# Hardware locality configuration: + +HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3" + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib" + +#----------------------------------------------------------------------------- +# Pthread + +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON" +CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON" + +#----------------------------------------------------------------------------- +# C++11 + +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON" +# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON" + +#----------------------------------------------------------------------------- +# +# Remove CMake output files to force reconfigure from scratch. +# + +rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile* + +# + +echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR} + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt new file mode 100644 index 0000000000..f2eb674578 --- /dev/null +++ b/lib/kokkos/config/master_history.txt @@ -0,0 +1,3 @@ +tag: 2.01.00 date: 07:21:2016 master: xxxxxxxx develop: fa6dfcc4 +tag: 2.01.06 date: 09:02:2016 master: 9afaa87f develop: 555f1a3a + diff --git a/lib/kokkos/config/nvcc_wrapper b/lib/kokkos/config/nvcc_wrapper new file mode 100755 index 0000000000..6093cb61bd --- /dev/null +++ b/lib/kokkos/config/nvcc_wrapper @@ -0,0 +1,280 @@ +#!/bin/bash +# +# This shell script (nvcc_wrapper) wraps both the host compiler and +# NVCC, if you are building legacy C or C++ code with CUDA enabled. +# The script remedies some differences between the interface of NVCC +# and that of the host compiler, in particular for linking. +# It also means that a legacy code doesn't need separate .cu files; +# it can just use .cpp files. +# +# Default settings: change those according to your machine. For +# example, you may have have two different wrappers with either icpc +# or g++ as their back-end compiler. The defaults can be overwritten +# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc). + +default_arch="sm_35" +#default_arch="sm_50" + +# +# The default C++ compiler. +# +host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"} +#host_compiler="icpc" +#host_compiler="/usr/local/gcc/4.8.3/bin/g++" +#host_compiler="/usr/local/gcc/4.9.1/bin/g++" + +# +# Internal variables +# + +# C++ files +cpp_files="" + +# Host compiler arguments +xcompiler_args="" + +# Cuda (NVCC) only arguments +cuda_args="" + +# Arguments for both NVCC and Host compiler +shared_args="" + +# Linker arguments +xlinker_args="" + +# Object files passable to NVCC +object_files="" + +# Link objects for the host linker only +object_files_xlinker="" + +# Shared libraries with version numbers are not handled correctly by NVCC +shared_versioned_libraries_host="" +shared_versioned_libraries="" + +# Does the User set the architecture +arch_set=0 + +# Does the user overwrite the host compiler +ccbin_set=0 + +#Error code of compilation +error_code=0 + +# Do a dry run without actually compiling +dry_run=0 + +# Skip NVCC compilation and use host compiler directly +host_only=0 + +# Enable workaround for CUDA 6.5 for pragma ident +replace_pragma_ident=0 + +# Mark first host compiler argument +first_xcompiler_arg=1 + +temp_dir=${TMPDIR:-/tmp} + +# Check if we have an optimization argument already +optimization_applied=0 + +#echo "Arguments: $# $@" + +while [ $# -gt 0 ] +do + case $1 in + #show the executed command + --show|--nvcc-wrapper-show) + dry_run=1 + ;; + #run host compilation only + --host-only) + host_only=1 + ;; + #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros + --replace-pragma-ident) + replace_pragma_ident=1 + ;; + #handle source files to be compiled as cuda files + *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu) + cpp_files="$cpp_files $1" + ;; + # Ensure we only have one optimization flag because NVCC doesn't allow muliple + -O*) + if [ $optimization_applied -eq 1 ]; then + echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting." + else + shared_args="$shared_args $1" + optimization_applied=1 + fi + ;; + #Handle shared args (valid for both nvcc and the host compiler) + -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared) + shared_args="$shared_args $1" + ;; + #Handle shared args that have an argument + -o|-MT) + shared_args="$shared_args $1 $2" + shift + ;; + #Handle known nvcc args + -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*) + cuda_args="$cuda_args $1" + ;; + #Handle known nvcc args that have an argument + -rdc|-maxrregcount|--default-stream) + cuda_args="$cuda_args $1 $2" + shift + ;; + #Handle c++11 setting + --std=c++11|-std=c++11) + shared_args="$shared_args $1" + ;; + #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98 + -std=c++98|--std=c++98) + ;; + #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor + -pedantic|-Wpedantic|-ansi) + ;; + #strip -Xcompiler because we add it + -Xcompiler) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$2" + fi + shift + ;; + #strip of "-x cu" because we add that + -x) + if [[ $2 != "cu" ]]; then + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args="-x,$2" + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,-x,$2" + fi + fi + shift + ;; + #Handle -ccbin (if its not set we can set it to a default value) + -ccbin) + cuda_args="$cuda_args $1 $2" + ccbin_set=1 + host_compiler=$2 + shift + ;; + #Handle -arch argument (if its not set use a default + -arch*) + cuda_args="$cuda_args $1" + arch_set=1 + ;; + #Handle -Xcudafe argument + -Xcudafe) + cuda_args="$cuda_args -Xcudafe $2" + shift + ;; + #Handle args that should be sent to the linker + -Wl*) + xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}" + host_linker_args="$host_linker_args ${1:4:${#1}}" + ;; + #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking + *.a|*.so|*.o|*.obj) + object_files="$object_files $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking + *.dylib) + object_files="$object_files -Xlinker $1" + object_files_xlinker="$object_files_xlinker -Xlinker $1" + ;; + #Handle shared libraries with *.so.* names which nvcc can't do. + *.so.*) + shared_versioned_libraries_host="$shared_versioned_libraries_host $1" + shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1" + ;; + #All other args are sent to the host compiler + *) + if [ $first_xcompiler_arg -eq 1 ]; then + xcompiler_args=$1 + first_xcompiler_arg=0 + else + xcompiler_args="$xcompiler_args,$1" + fi + ;; + esac + + shift +done + +#Add default host compiler if necessary +if [ $ccbin_set -ne 1 ]; then + cuda_args="$cuda_args -ccbin $host_compiler" +fi + +#Add architecture command +if [ $arch_set -ne 1 ]; then + cuda_args="$cuda_args -arch=$default_arch" +fi + +#Compose compilation command +nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries" +if [ $first_xcompiler_arg -eq 0 ]; then + nvcc_command="$nvcc_command -Xcompiler $xcompiler_args" +fi + +#Compose host only command +host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host" + +#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING' +if [ $replace_pragma_ident -eq 1 ]; then + cpp_files2="" + for file in $cpp_files + do + var=`grep pragma ${file} | grep ident | grep "#"` + if [ "${#var}" -gt 0 ] + then + sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file + cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file" + else + cpp_files2="$cpp_files2 $file" + fi + done + cpp_files=$cpp_files2 + #echo $cpp_files +fi + +if [ "$cpp_files" ]; then + nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files" +else + nvcc_command="$nvcc_command $object_files" +fi + +if [ "$cpp_files" ]; then + host_command="$host_command $object_files $cpp_files" +else + host_command="$host_command $object_files" +fi + +#Print command for dryrun +if [ $dry_run -eq 1 ]; then + if [ $host_only -eq 1 ]; then + echo $host_command + else + echo $nvcc_command + fi + exit 0 +fi + +#Run compilation command +if [ $host_only -eq 1 ]; then + $host_command +else + $nvcc_command +fi +error_code=$? + +#Report error code +exit $error_code diff --git a/lib/kokkos/config/snapshot.py b/lib/kokkos/config/snapshot.py new file mode 100755 index 0000000000..d816cd0c9c --- /dev/null +++ b/lib/kokkos/config/snapshot.py @@ -0,0 +1,279 @@ +#! /usr/bin/env python + +""" +Snapshot a project into another project and perform the necessary repo actions +to provide a commit message that can be used to trace back to the exact point +in the source repository. +""" + +#todo: +# Support svn +# Allow renaming of the source dir in the destination path +# Check if a new snapshot is necessary? +# + +import sys + +#check the version number so that there is a good error message when argparse is not available. +#This checks for exactly 2.7 which is bad, but it is a python 2 script and argparse was introduced +#in 2.7 which is also the last version of python 2. If this script is updated for python 3 this +#will need to change, but for now it is not safe to allow 3.x to run this. +if sys.version_info[:2] != (2, 7): + print "Error snapshot requires python 2.7 detected version is %d.%d." % (sys.version_info[0], sys.version_info[1]) + sys.exit(1) + +import subprocess, argparse, re, doctest, os, datetime, traceback + +def parse_cmdline(description): + parser = argparse.ArgumentParser(usage="snapshot.py [options] source destination", description=description) + + parser.add_argument("-n", "--no-comit", action="store_false", dest="create_commit", default=True, + help="Do not perform a commit or create a commit message.") + parser.add_argument("-v", "--verbose", action="store_true", dest="verbose_mode", default=False, + help="Enable verbose mode.") + parser.add_argument("-d", "--debug", action="store_true", dest="debug_mode", default=False, + help="Enable debugging output.") + parser.add_argument("--no-validate-repo", action="store_true", dest="no_validate_repo", default=False, + help="Reduce the validation that the source and destination repos are clean to a warning.") + parser.add_argument("--source-repo", choices=["git","none"], default="", + help="Type of repository of the source, use none to skip all repository operations.") + parser.add_argument("--dest-repo", choices=["git","none"], default="", + help="Type of repository of the destination, use none to skip all repository operations.") + + parser.add_argument("source", help="Source project to snapshot from.") + parser.add_argument("destination", help="Destination to snapshot too.") + + options = parser.parse_args() + options = validate_options(options) + return options +#end parseCmdline + +def validate_options(options): + apparent_source_repo_type="none" + apparent_dest_repo_type="none" + + #prevent user from accidentally giving us a path that rsync will treat differently than expected. + options.source = options.source.rstrip(os.sep) + options.destination = options.destination.rstrip(os.sep) + + options.source = os.path.abspath(options.source) + options.destination = os.path.abspath(options.destination) + + if os.path.exists(options.source): + apparent_source_repo_type, source_root = deterimine_repo_type(options.source) + else: + raise RuntimeError("Could not find source directory of %s." % options.source) + options.source_root = source_root + + if not os.path.exists(options.destination): + print "Could not find destination directory of %s so it will be created." % options.destination + os.makedirs(options.destination) + + apparent_dest_repo_type, dest_root = deterimine_repo_type(options.destination) + options.dest_root = dest_root + + #error on svn repo types for now + if apparent_source_repo_type == "svn" or apparent_dest_repo_type == "svn": + raise RuntimeError("SVN repositories are not supported at this time.") + + if options.source_repo == "": + #source repo type is not specified to just using the apparent type. + options.source_repo = apparent_source_repo_type + else: + if options.source_repo != "none" and options.source_repo != apparent_source_repo_type: + raise RuntimeError("Specified source repository type of %s conflicts with determined type of %s" % \ + (options.source_repo, apparent_source_repo_type)) + + if options.dest_repo == "": + #destination repo type is not specified to just using the apparent type. + options.dest_repo = apparent_dest_repo_type + else: + if options.dest_repo != "none" and options.dest_repo != apparent_dest_repo_type: + raise RuntimeError("Specified destination repository type of %s conflicts with determined type of %s" % \ + (options.dest_repo, apparent_dest_repo_type)) + + return options +#end validate_options + +def run_cmd(cmd, options, working_dir="."): + cmd_str = " ".join(cmd) + if options.verbose_mode: + print "Running command '%s' in dir %s." % (cmd_str, working_dir) + + proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=working_dir) + proc_stdout, proc_stderr = proc.communicate() + ret_val = proc.wait() + + if options.debug_mode: + print "==== %s stdout start ====" % cmd_str + print proc_stdout + print "==== %s stdout end ====" % cmd_str + print "==== %s stderr ====" % cmd_str + print proc_stderr + print "==== %s stderr ====" % cmd_str + + if ret_val != 0: + raise RuntimeError("Command '%s' failed with error code %d. Error message:%s%s%sstdout:%s" % \ + (cmd_str, ret_val, os.linesep, proc_stderr, os.linesep, proc_stdout)) + + return proc_stdout, proc_stderr +#end run_cmd + +def deterimine_repo_type(location): + apparent_repo_type = "none" + + while location != "": + if os.path.exists(os.path.join(location, ".git")): + apparent_repo_type = "git" + break + elif os.path.exists(os.path.join(location, ".svn")): + apparent_repo_type = "svn" + break + else: + location = location[:location.rfind(os.sep)] + + return apparent_repo_type, location + +#end deterimine_repo_type + +def rsync(source, dest, options): + rsync_cmd = ["rsync", "-ar", "--delete"] + if options.debug_mode: + rsync_cmd.append("-v") + + if options.source_repo == "git": + rsync_cmd.append("--exclude=.git") + + rsync_cmd.append(options.source) + rsync_cmd.append(options.destination) + run_cmd(rsync_cmd, options) +#end rsync + +def create_commit_message(commit_id, commit_log, project_name, project_location): + eol = os.linesep + message = "Snapshot of %s from commit %s" % (project_name, commit_id) + message += eol * 2 + message += "From repository at %s" % project_location + message += eol * 2 + message += "At commit:" + eol + message += commit_log + return message +#end create_commit_message + +def find_git_commit_information(options): + r""" + >>> class fake_options: + ... source="." + ... verbose_mode=False + ... debug_mode=False + >>> myoptions = fake_options() + >>> find_git_commit_information(myoptions)[2:] + ('sems', 'software.sandia.gov:/git/sems') + """ + git_log_cmd = ["git", "log", "-1"] + + output, error = run_cmd(git_log_cmd, options, options.source) + + commit_match = re.match("commit ([0-9a-fA-F]+)", output) + commit_id = commit_match.group(1) + commit_log = output + + git_remote_cmd = ["git", "remote", "-v"] + output, error = run_cmd(git_remote_cmd, options, options.source) + + remote_match = re.search("origin\s([^ ]*/([^ ]+))", output, re.MULTILINE) + if not remote_match: + raise RuntimeError("Could not find origin of repo at %s. Consider using none for source repo type." % (options.source)) + + source_location = remote_match.group(1) + source_name = remote_match.group(2).strip() + + if source_name[-1] == "/": + source_name = source_name[:-1] + + return commit_id, commit_log, source_name, source_location + +#end find_git_commit_information + +def do_git_commit(message, options): + if options.verbose_mode: + print "Commiting to destination repository." + + git_add_cmd = ["git", "add", "-A"] + run_cmd(git_add_cmd, options, options.destination) + + git_commit_cmd = ["git", "commit", "-m%s" % message] + run_cmd(git_commit_cmd, options, options.destination) + + git_log_cmd = ["git", "log", "--format=%h", "-1"] + commit_sha1, error = run_cmd(git_log_cmd, options, options.destination) + + print "Commit %s was made to %s." % (commit_sha1.strip(), options.dest_root) +#end do_git_commit + +def verify_git_repo_clean(location, options): + git_status_cmd = ["git", "status", "--porcelain"] + output, error = run_cmd(git_status_cmd, options, location) + + if output != "": + if options.no_validate_repo == False: + raise RuntimeError("%s is not clean.%sPlease commit or stash all changes before running snapshot." + % (location, os.linesep)) + else: + print "WARNING: %s is not clean. Proceeding anyway." % location + print "WARNING: This could lead to differences in the source and destination." + print "WARNING: It could also lead to extra files being included in the snapshot commit." + +#end verify_git_repo_clean + +def main(options): + if options.verbose_mode: + print "Snapshotting %s to %s." % (options.source, options.destination) + + if options.source_repo == "git": + verify_git_repo_clean(options.source, options) + commit_id, commit_log, repo_name, repo_location = find_git_commit_information(options) + elif options.source_repo == "none": + commit_id = "N/A" + commit_log = "Unknown commit from %s snapshotted at: %s" % (options.source, datetime.datetime.now()) + repo_name = options.source + repo_location = options.source + + commit_message = create_commit_message(commit_id, commit_log, repo_name, repo_location) + os.linesep*2 + + if options.dest_repo == "git": + verify_git_repo_clean(options.destination, options) + + rsync(options.source, options.destination, options) + + if options.dest_repo == "git": + do_git_commit(commit_message, options) + elif options.dest_repo == "none": + file_name = "snapshot_message.txt" + message_file = open(file_name, "w") + message_file.write(commit_message) + message_file.close() + cwd = os.getcwd() + print "No commit done by request. Please use file at:" + print "%s%sif you wish to commit this to a repo later." % (cwd+"/"+file_name, os.linesep) + + + + +#end main + +if (__name__ == "__main__"): + if ("--test" in sys.argv): + doctest.testmod() + sys.exit(0) + + try: + options = parse_cmdline(__doc__) + main(options) + except RuntimeError, e: + print "Error occured:", e + if "--debug" in sys.argv: + traceback.print_exc() + sys.exit(1) + else: + sys.exit(0) diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia new file mode 100755 index 0000000000..aac036a8f3 --- /dev/null +++ b/lib/kokkos/config/test_all_sandia @@ -0,0 +1,539 @@ +#!/bin/bash -e + +# +# Global config +# + +set -o pipefail + +# Determine current machine + +MACHINE="" +HOSTNAME=$(hostname) +if [[ "$HOSTNAME" =~ (white|ride).* ]]; then + MACHINE=white +elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then + MACHINE=bowman +elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name + MACHINE=shepard +elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then + MACHINE=sems +else + echo "Unrecognized machine" >&2 + exit 1 +fi + +GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" +INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial" +CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial" +CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial" + +GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized" +IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized" +CUDA_WARNING_FLAGS="" + +# Default. Machine specific can override +DEBUG=False +ARGS="" +CUSTOM_BUILD_LIST="" +DRYRUN=False +BUILD_ONLY=False +declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3 +TEST_SCRIPT=False +SKIP_HWLOC=False + +ARCH_FLAG="" + +# +# Machine specific config +# + +if [ "$MACHINE" = "sems" ]; then + source /projects/modulefiles/utils/sems-modules-init.sh + source /projects/modulefiles/utils/kokkos-modules-init.sh + + BASE_MODULE_LIST="//base,hwloc/1.10.1///base" + CUDA_MODULE_LIST="/,gcc/4.7.2/base" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS" + "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS" + ) + +elif [ "$MACHINE" = "white" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="/" + IBM_MODULE_LIST="/xl/" + CUDA_MODULE_LIST="/,gcc/4.9.2" + + # Don't do pthread on white + GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS" + "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=Power8" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +elif [ "$MACHINE" = "bowman" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="/compilers/" + + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=KNL" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +elif [ "$MACHINE" = "shepard" ]; then + source /etc/profile.d/modules.sh + SKIP_HWLOC=True + export SLURM_TASKS_PER_NODE=32 + + BASE_MODULE_LIST="/compilers/" + + OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial" + + # Format: (compiler module-list build-list exe-name warning-flag) + COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS" + ) + + ARCH_FLAG="--arch=HSW" + NUM_JOBS_TO_RUN_IN_PARALLEL=8 + +else + echo "Unhandled machine $MACHINE" >&2 + exit 1 +fi + +export OMP_NUM_THREADS=4 + +declare -i NUM_RESULTS_TO_KEEP=7 + +RESULT_ROOT_PREFIX=TestAll + +SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd ) + +# +# Handle arguments +# + +while [[ $# > 0 ]] +do +key="$1" +case $key in +--kokkos-path*) +KOKKOS_PATH="${key#*=}" +;; +--build-list*) +CUSTOM_BUILD_LIST="${key#*=}" +;; +--debug*) +DEBUG=True +;; +--build-only*) +BUILD_ONLY=True +;; +--test-script*) +TEST_SCRIPT=True +;; +--skip-hwloc*) +SKIP_HWLOC=True +;; +--num*) +NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}" +;; +--dry-run*) +DRYRUN=True +;; +--help) +echo "test_all_sandia :" +echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory" +echo " Defaults to root repo containing this script" +echo "--debug: Run tests in debug. Defaults to False" +echo "--test-script: Test this script, not Kokkos" +echo "--skip-hwloc: Do not do hwloc tests" +echo "--num=N: Number of jobs to run in parallel " +echo "--dry-run: Just print what would be executed" +echo "--build-only: Just do builds, don't run anything" +echo "--build-list=BUILD,BUILD,BUILD..." +echo " Provide a comma-separated list of builds instead of running all builds" +echo " Valid items:" +echo " OpenMP, Pthread, Serial, OpenMP_Serial, Pthread_Serial" +echo " Cuda_OpenMP, Cuda_Pthread, Cuda_Serial" +echo "" + +echo "ARGS: list of expressions matching compilers to test" +echo " supported compilers sems" +for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + echo " $COMPILER" +done +echo "" + +echo "Examples:" +echo " Run all tests" +echo " % test_all_sandia" +echo "" +echo " Run all gcc tests" +echo " % test_all_sandia gcc" +echo "" +echo " Run all gcc/4.7.2 and all intel tests" +echo " % test_all_sandia gcc/4.7.2 intel" +echo "" +echo " Run all tests in debug" +echo " % test_all_sandia --debug" +echo "" +echo " Run gcc/4.7.2 and only do OpenMP and OpenMP_Serial builds" +echo " % test_all_sandia gcc/4.7.2 --build-list=OpenMP,OpenMP_Serial" +echo "" +echo "If you want to kill the tests, do:" +echo " hit ctrl-z" +echo " % kill -9 %1" +echo +exit 0 +;; +*) +# args, just append +ARGS="$ARGS $1" +;; +esac +shift +done + +# set kokkos path +if [ -z "$KOKKOS_PATH" ]; then + KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT +else + # Ensure KOKKOS_PATH is abs path + KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd ) +fi + +# set build type +if [ "$DEBUG" = "True" ]; then + BUILD_TYPE=debug +else + BUILD_TYPE=release +fi + +# If no args provided, do all compilers +if [ -z "$ARGS" ]; then + ARGS='?' +fi + +# Process args to figure out which compilers to test +COMPILERS_TO_TEST="" +for ARG in $ARGS; do + for COMPILER_DATA in "${COMPILERS[@]}"; do + ARR=($COMPILER_DATA) + COMPILER=${ARR[0]} + if [[ "$COMPILER" = $ARG* ]]; then + if [[ "$COMPILERS_TO_TEST" != *${COMPILER}* ]]; then + COMPILERS_TO_TEST="$COMPILERS_TO_TEST $COMPILER" + else + echo "Tried to add $COMPILER twice" + fi + fi + done +done + +# +# Functions +# + +# get_compiler_name +get_compiler_name() { + echo $1 | cut -d/ -f1 +} + +# get_compiler_version +get_compiler_version() { + echo $1 | cut -d/ -f2 +} + +# Do not call directly +get_compiler_data() { + local compiler=$1 + local item=$2 + local compiler_name=$(get_compiler_name $compiler) + local compiler_vers=$(get_compiler_version $compiler) + + local compiler_data + for compiler_data in "${COMPILERS[@]}" ; do + local arr=($compiler_data) + if [ "$compiler" = "${arr[0]}" ]; then + echo "${arr[$item]}" | tr , ' ' | sed -e "s//$compiler_name/g" -e "s//$compiler_vers/g" + return 0 + fi + done + + # Not found + echo "Unreconized compiler $compiler" >&2 + exit 1 +} + +# +# For all getters, usage: +# + +get_compiler_modules() { + get_compiler_data $1 1 +} + +get_compiler_build_list() { + get_compiler_data $1 2 +} + +get_compiler_exe_name() { + get_compiler_data $1 3 +} + +get_compiler_warning_flags() { + get_compiler_data $1 4 +} + +run_cmd() { + echo "RUNNING: $*" + if [ "$DRYRUN" != "True" ]; then + eval "$* 2>&1" + fi +} + +# report_and_log_test_results +report_and_log_test_result() { + # Use sane var names + local success=$1; local desc=$2; local comment=$3; + + if [ "$success" = "0" ]; then + echo " PASSED $desc" + echo $comment > $PASSED_DIR/$desc + else + # For failures, comment should be the name of the phase that failed + echo " FAILED $desc" >&2 + echo $comment > $FAILED_DIR/$desc + cat ${desc}.${comment}.log + fi +} + +setup_env() { + local compiler=$1 + local compiler_modules=$(get_compiler_modules $compiler) + + module purge + + local mod + for mod in $compiler_modules; do + echo "Loading module $mod" + module load $mod 2>&1 + # It is ridiculously hard to check for the success of a loaded + # module. Module does not return error codes and piping to grep + # causes module to run in a subshell. + module list 2>&1 | grep "$mod" >& /dev/null || return 1 + done + + return 0 +} + +# single_build_and_test +single_build_and_test() { + # Use sane var names + local compiler=$1; local build=$2; local build_type=$3; + + # set up env + mkdir -p $ROOT_DIR/$compiler/"${build}-$build_type" + cd $ROOT_DIR/$compiler/"${build}-$build_type" + local desc=$(echo "${compiler}-${build}-${build_type}" | sed 's:/:-:g') + setup_env $compiler >& ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + + # Set up flags + local compiler_warning_flags=$(get_compiler_warning_flags $compiler) + local compiler_exe=$(get_compiler_exe_name $compiler) + + if [[ "$build_type" = hwloc* ]]; then + local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info))) + fi + + if [[ "$build_type" = *debug* ]]; then + local extra_args="$extra_args --debug" + local cxxflags="-g $compiler_warning_flags" + else + local cxxflags="-O3 $compiler_warning_flags" + fi + + if [[ "$compiler" == cuda* ]]; then + cxxflags="--keep --keep-dir=$(pwd) $cxxflags" + export TMPDIR=$(pwd) + fi + + # cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags" + + echo " Starting job $desc" + + local comment="no_comment" + + if [ "$TEST_SCRIPT" = "True" ]; then + local rand=$[ 1 + $[ RANDOM % 10 ]] + sleep $rand + if [ $rand -gt 5 ]; then + run_cmd ls fake_problem >& ${desc}.configure.log || { report_and_log_test_result 1 $desc configure && return 0; } + fi + else + run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; } + local -i build_start_time=$(date +%s) + run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; } + local -i build_end_time=$(date +%s) + comment="build_time=$(($build_end_time-$build_start_time))" + if [[ "$BUILD_ONLY" == False ]]; then + run_cmd make test >& ${desc}.test.log || { report_and_log_test_result 1 ${desc} test && return 0; } + local -i run_end_time=$(date +%s) + comment="$comment run_time=$(($run_end_time-$build_end_time))" + fi + fi + + report_and_log_test_result 0 $desc "$comment" + + return 0 +} + +# wait_for_jobs +wait_for_jobs() { + local -i max_jobs=$1 + local -i num_active_jobs=$(jobs | wc -l) + while [ $num_active_jobs -ge $max_jobs ] + do + sleep 1 + num_active_jobs=$(jobs | wc -l) + jobs >& /dev/null + done +} + +# run_in_background +run_in_background() { + local compiler=$1 + + local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL + if [[ "$BUILD_ONLY" == True ]]; then + num_jobs=8 + else + if [[ "$compiler" == cuda* ]]; then + num_jobs=1 + fi + fi + wait_for_jobs $num_jobs + + single_build_and_test $* & +} + +# build_and_test_all +build_and_test_all() { + # Get compiler data + local compiler=$1 + if [ -z "$CUSTOM_BUILD_LIST" ]; then + local compiler_build_list=$(get_compiler_build_list $compiler) + else + local compiler_build_list=$(echo "$CUSTOM_BUILD_LIST" | tr , ' ') + fi + + # do builds + local build + for build in $compiler_build_list + do + run_in_background $compiler $build $BUILD_TYPE + + # If not cuda, do a hwloc test too + if [[ "$compiler" != cuda* && "$SKIP_HWLOC" == False ]]; then + run_in_background $compiler $build "hwloc-$BUILD_TYPE" + fi + done + + return 0 +} + +get_test_root_dir() { + local existing_results=$(find . -maxdepth 1 -name "$RESULT_ROOT_PREFIX*" | sort) + local -i num_existing_results=$(echo $existing_results | tr ' ' '\n' | wc -l) + local -i num_to_delete=${num_existing_results}-${NUM_RESULTS_TO_KEEP} + + if [ $num_to_delete -gt 0 ]; then + /bin/rm -rf $(echo $existing_results | tr ' ' '\n' | head -n $num_to_delete) + fi + + echo $(pwd)/${RESULT_ROOT_PREFIX}_$(date +"%Y-%m-%d_%H.%M.%S") +} + +wait_summarize_and_exit() { + wait_for_jobs 1 + + echo "#######################################################" + echo "PASSED TESTS" + echo "#######################################################" + + local passed_test + for passed_test in $(\ls -1 $PASSED_DIR | sort) + do + echo $passed_test $(cat $PASSED_DIR/$passed_test) + done + + echo "#######################################################" + echo "FAILED TESTS" + echo "#######################################################" + + local failed_test + local -i rv=0 + for failed_test in $(\ls -1 $FAILED_DIR | sort) + do + echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)" + rv=$rv+1 + done + + exit $rv +} + +# +# Main +# + +ROOT_DIR=$(get_test_root_dir) +mkdir -p $ROOT_DIR +cd $ROOT_DIR + +PASSED_DIR=$ROOT_DIR/results/passed +FAILED_DIR=$ROOT_DIR/results/failed +mkdir -p $PASSED_DIR +mkdir -p $FAILED_DIR + +echo "Going to test compilers: " $COMPILERS_TO_TEST +for COMPILER in $COMPILERS_TO_TEST; do + echo "Testing compiler $COMPILER" + build_and_test_all $COMPILER +done + +wait_summarize_and_exit diff --git a/lib/kokkos/config/testing_scripts/README b/lib/kokkos/config/testing_scripts/README new file mode 100644 index 0000000000..455afffd84 --- /dev/null +++ b/lib/kokkos/config/testing_scripts/README @@ -0,0 +1,5 @@ +jenkins_test_driver is designed to be run through Jenkins as a +multiconfiguration job. It relies on a number of environment variables that will +only be set when run in that context. It is possible to override these if you +know the Jenkins job setup. It is not recommended that a non-expert try to run +this script directly. diff --git a/lib/kokkos/config/testing_scripts/jenkins_test_driver b/lib/kokkos/config/testing_scripts/jenkins_test_driver new file mode 100755 index 0000000000..9cba7fa518 --- /dev/null +++ b/lib/kokkos/config/testing_scripts/jenkins_test_driver @@ -0,0 +1,83 @@ +#!/bin/bash -x + +echo "Building for BUILD_TYPE = ${BUILD_TYPE}" +echo "Building with HOST_COMPILER = ${HOST_COMPILER}" +echo "Building in ${WORKSPACE}" + +module use /home/projects/modulefiles + +BUILD_TYPE=`echo $BUILD_TYPE | tr "~" " "` +build_options="" +for item in ${BUILD_TYPE}; do + build_options="$build_options --with-$item" +done + +kokkos_path=${WORKSPACE}/kokkos +gtest_path=${WORKSPACE}/kokkos/tpls/gtest + +echo ${WORKSPACE} +pwd + +#extract information from the provided parameters. +host_compiler_brand=`echo $HOST_COMPILER | grep -o "^[a-zA-Z]*"` +cuda_compiler=`echo $BUILD_TYPE | grep -o "cuda_[^ ]*"` + +host_compiler_module=`echo $HOST_COMPILER | tr "_" "/"` +cuda_compiler_module=`echo $cuda_compiler | tr "_" "/"` +build_path=`echo $BUILD_TYPE | tr " " "_"` + +module load $host_compiler_module +module load $cuda_compiler_module + +case $host_compiler_brand in + gcc) + module load nvcc-wrapper/gnu + compiler=g++ + ;; + intel) + module load nvcc-wrapper/intel + compiler=icpc + ;; + *) + echo "Unrecognized compiler brand." + exit 1 + ;; +esac + +#if cuda is on we need to set the host compiler for the +#nvcc wrapper and make the wrapper the compiler. +if [ $cuda_compiler != "" ]; then + export NVCC_WRAPPER_DEFAULT_COMPILER=$compiler + compiler=$kokkos_path/config/nvcc_wrapper +fi + +if [ $host_compiler_brand == "intel" -a $cuda_compiler != "" ]; then + echo "Intel compilers are not supported with cuda at this time." + exit 0 +fi + +rm -rf test-$build_path +mkdir test-$build_path +cd test-$build_path + +/bin/bash $kokkos_path/generate_makefile.bash $build_options --kokkos-path="$kokkos_path" --with-gtest="$gtest_path" --compiler=$compiler 2>&1 |tee configure.out + +if [ ${PIPESTATUS[0]} != 0 ]; then + echo "Configure failed." + exit 1 +fi + +make build-test 2>&1 | tee build.log + +if [ ${PIPESTATUS[0]} != 0 ]; then + echo "Build failed." + exit 1 +fi + +make test 2>&1 | tee test.log + +grep "FAIL" test.log +if [ $? == 0 ]; then + echo "Tests failed." + exit 1 +fi diff --git a/lib/kokkos/config/testing_scripts/obj_size_opt_check b/lib/kokkos/config/testing_scripts/obj_size_opt_check new file mode 100755 index 0000000000..47c84d1a92 --- /dev/null +++ b/lib/kokkos/config/testing_scripts/obj_size_opt_check @@ -0,0 +1,287 @@ +#! /usr/bin/env python + +""" +Compute the size at which the current compiler will start to +significantly scale back optimization. + +The CPP file being modified will need the following tags. +// JGF_DUPLICATE_BEGIN - Put before start of function to duplicate +// JGF_DUPLICATE_END - Put after end of function to duplcate +// JGF_DUPE function_name(args); - Put anywhere where it's legal to +put a function call but not in your timing section. + +The program will need to output the string: +FOM: +This will represent the program's performance +""" + +import argparse, sys, os, doctest, subprocess, re, time + +VERBOSE = False + +############################################################################### +def parse_command_line(args, description): +############################################################################### + parser = argparse.ArgumentParser( + usage="""\n%s [--verbose] +OR +%s --help +OR +%s --test + +\033[1mEXAMPLES:\033[0m + > %s foo.cpp 'make -j4' foo +""" % ((os.path.basename(args[0]), ) * 4), + +description=description, + +formatter_class=argparse.ArgumentDefaultsHelpFormatter +) + + parser.add_argument("cppfile", help="Name of file to modify.") + + parser.add_argument("buildcmd", help="Build command") + + parser.add_argument("execmd", help="Run command") + + parser.add_argument("-v", "--verbose", action="store_true", + help="Print extra information") + + parser.add_argument("-s", "--start", type=int, default=1, + help="Starting number of dupes") + + parser.add_argument("-e", "--end", type=int, default=1000, + help="Ending number of dupes") + + parser.add_argument("-n", "--repeat", type=int, default=10, + help="Number of times to repeat an individial execution. Best value will be taken.") + + parser.add_argument("-t", "--template", action="store_true", + help="Use templating instead of source copying to increase object size") + + parser.add_argument("-c", "--csv", action="store_true", + help="Print results as CSV") + + args = parser.parse_args(args[1:]) + + if (args.verbose): + global VERBOSE + VERBOSE = True + + return args.cppfile, args.buildcmd, args.execmd, args.start, args.end, args.repeat, args.template, args.csv + +############################################################################### +def verbose_print(msg, override=None): +############################################################################### + if ( (VERBOSE and not override is False) or override): + print msg + +############################################################################### +def error_print(msg): +############################################################################### + print >> sys.stderr, msg + +############################################################################### +def expect(condition, error_msg): +############################################################################### + """ + Similar to assert except doesn't generate an ugly stacktrace. Useful for + checking user error, not programming error. + """ + if (not condition): + raise SystemExit("FAIL: %s" % error_msg) + +############################################################################### +def run_cmd(cmd, ok_to_fail=False, input_str=None, from_dir=None, verbose=None, + arg_stdout=subprocess.PIPE, arg_stderr=subprocess.PIPE): +############################################################################### + verbose_print("RUN: %s" % cmd, verbose) + + if (input_str is not None): + stdin = subprocess.PIPE + else: + stdin = None + + proc = subprocess.Popen(cmd, + shell=True, + stdout=arg_stdout, + stderr=arg_stderr, + stdin=stdin, + cwd=from_dir) + output, errput = proc.communicate(input_str) + output = output.strip() if output is not None else output + stat = proc.wait() + + if (ok_to_fail): + return stat, output, errput + else: + if (arg_stderr is not None): + errput = errput if errput is not None else open(arg_stderr.name, "r").read() + expect(stat == 0, "Command: '%s' failed with error '%s'" % (cmd, errput)) + else: + expect(stat == 0, "Command: '%s' failed. See terminal output" % cmd) + return output + +############################################################################### +def build_and_run(source, cppfile, buildcmd, execmd, repeat): +############################################################################### + open(cppfile, 'w').writelines(source) + + run_cmd(buildcmd) + + best = None + for i in xrange(repeat): + wait_for_quiet_machine() + output = run_cmd(execmd) + + current = None + fom_regex = re.compile(r'^FOM: ([0-9.]+)$') + for line in output.splitlines(): + m = fom_regex.match(line) + if (m is not None): + current = float(m.groups()[0]) + break + + expect(current is not None, "No lines in output matched FOM regex") + + if (best is None or best < current): + best = current + + return best + +############################################################################### +def wait_for_quiet_machine(): +############################################################################### + while(True): + time.sleep(2) + + # The first iteration of top gives garbage results + idle_pct_raw = run_cmd("top -bn2 | grep 'Cpu(s)' | tr ',' ' ' | tail -n 1 | awk '{print $5}'") + + idle_pct_re = re.compile(r'^([0-9.]+)%id$') + m = idle_pct_re.match(idle_pct_raw) + + expect(m is not None, "top not returning output in expected form") + + idle_pct = float(m.groups()[0]) + if (idle_pct < 95): + error_print("Machine is too busy, waiting for it to become free") + else: + break + +############################################################################### +def add_n_dupes(curr_lines, num_dupes, template): +############################################################################### + function_name = None + function_invocation = None + function_lines = [] + + function_re = re.compile(r'^.* (\w+) *[(]') + function_inv_re = re.compile(r'^.*JGF_DUPE: +(.+)$') + + # Get function lines + record = False + definition_insertion_point = None + invocation_insertion_point = None + for idx, line in enumerate(curr_lines): + if ("JGF_DUPLICATE_BEGIN" in line): + record = True + m = function_re.match(curr_lines[idx+1]) + expect(m is not None, "Could not find function in line '%s'" % curr_lines[idx+1]) + function_name = m.groups()[0] + + elif ("JGF_DUPLICATE_END" in line): + record = False + definition_insertion_point = idx + 1 + + elif (record): + function_lines.append(line) + + elif ("JGF_DUPE" in line): + m = function_inv_re.match(line) + expect(m is not None, "Could not find function invocation example in line '%s'" % line) + function_invocation = m.groups()[0] + invocation_insertion_point = idx + 1 + + expect(function_name is not None, "Could not find name of dupe function") + expect(function_invocation is not None, "Could not find function invocation point") + + expect(definition_insertion_point < invocation_insertion_point, "fix me") + + dupe_func_defs = [] + dupe_invocations = ["int jgf_rand = std::rand();\n", "if (false) {}\n"] + + for i in xrange(num_dupes): + if (not template): + dupe_func = list(function_lines) + dupe_func[0] = dupe_func[0].replace(function_name, "%s%d" % (function_name, i)) + dupe_func_defs.extend(dupe_func) + + dupe_invocations.append("else if (jgf_rand == %d) " % i) + if (template): + dupe_call = function_invocation.replace(function_name, "%s<%d>" % (function_name, i)) + "\n" + else: + dupe_call = function_invocation.replace(function_name, "%s%d" % (function_name, i)) + "\n" + dupe_invocations.append(dupe_call) + + curr_lines[invocation_insertion_point:invocation_insertion_point] = dupe_invocations + curr_lines[definition_insertion_point:definition_insertion_point] = dupe_func_defs + +############################################################################### +def report(num_dupes, curr_lines, object_file, orig_fom, curr_fom, csv=False, is_first_report=False): +############################################################################### + fom_change = (curr_fom - orig_fom) / orig_fom + + if (csv): + if (is_first_report): + print "num_dupes, obj_byte_size, loc, fom, pct_diff" + + print "%s, %s, %s, %s, %s" % (num_dupes, os.path.getsize(object_file), len(curr_lines), curr_fom, fom_change*100) + else: + print "========================================================" + print "For number of dupes:", num_dupes + print "Object file size (bytes):", os.path.getsize(object_file) + print "Lines of code:", len(curr_lines) + print "Field of merit:", curr_fom + print "Change pct:", fom_change*100 + +############################################################################### +def obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv=False): +############################################################################### + orig_source_lines = open(cppfile, 'r').readlines() + + backup_file = "%s.orig" % cppfile + object_file = "%s.o" % os.path.splitext(cppfile)[0] + os.rename(cppfile, backup_file) + + orig_fom = build_and_run(orig_source_lines, cppfile, buildcmd, execmd, repeat) + report(0, orig_source_lines, object_file, orig_fom, orig_fom, csv=csv, is_first_report=True) + + i = start + while (i < end): + curr_lines = list(orig_source_lines) + add_n_dupes(curr_lines, i, template) + + curr_fom = build_and_run(curr_lines, cppfile, buildcmd, execmd, repeat) + + report(i, curr_lines, object_file, orig_fom, curr_fom, csv=csv) + + i *= 2 # make growth function configurable? + + os.remove(cppfile) + os.rename(backup_file, cppfile) + +############################################################################### +def _main_func(description): +############################################################################### + if ("--test" in sys.argv): + test_results = doctest.testmod(verbose=True) + sys.exit(1 if test_results.failed > 0 else 0) + + cppfile, buildcmd, execmd, start, end, repeat, template, csv = parse_command_line(sys.argv, description) + + obj_size_opt_check(cppfile, buildcmd, execmd, start, end, repeat, template, csv) + +############################################################################### +if (__name__ == "__main__"): + _main_func(__doc__) diff --git a/lib/kokkos/containers/CMakeLists.txt b/lib/kokkos/containers/CMakeLists.txt new file mode 100644 index 0000000000..894935fa01 --- /dev/null +++ b/lib/kokkos/containers/CMakeLists.txt @@ -0,0 +1,10 @@ + + +TRIBITS_SUBPACKAGE(Containers) + +ADD_SUBDIRECTORY(src) + +TRIBITS_ADD_TEST_DIRECTORIES(unit_tests) +TRIBITS_ADD_TEST_DIRECTORIES(performance_tests) + +TRIBITS_SUBPACKAGE_POSTPROCESS() diff --git a/lib/kokkos/containers/cmake/Dependencies.cmake b/lib/kokkos/containers/cmake/Dependencies.cmake new file mode 100644 index 0000000000..1d71d8af34 --- /dev/null +++ b/lib/kokkos/containers/cmake/Dependencies.cmake @@ -0,0 +1,5 @@ +TRIBITS_PACKAGE_DEFINE_DEPENDENCIES( + LIB_REQUIRED_PACKAGES KokkosCore + LIB_OPTIONAL_TPLS Pthread CUDA HWLOC + TEST_OPTIONAL_TPLS CUSPARSE + ) diff --git a/lib/kokkos/containers/cmake/KokkosContainers_config.h.in b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in new file mode 100644 index 0000000000..d91fdda1e3 --- /dev/null +++ b/lib/kokkos/containers/cmake/KokkosContainers_config.h.in @@ -0,0 +1,4 @@ +#ifndef KOKKOS_CONTAINERS_CONFIG_H +#define KOKKOS_CONTAINERS_CONFIG_H + +#endif diff --git a/lib/kokkos/containers/performance_tests/CMakeLists.txt b/lib/kokkos/containers/performance_tests/CMakeLists.txt new file mode 100644 index 0000000000..726d403452 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/CMakeLists.txt @@ -0,0 +1,37 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src ) + +SET(SOURCES + TestMain.cpp + TestCuda.cpp + ) + +IF(Kokkos_ENABLE_Pthread) + LIST( APPEND SOURCES TestThreads.cpp) +ENDIF() + +IF(Kokkos_ENABLE_OpenMP) + LIST( APPEND SOURCES TestOpenMP.cpp) +ENDIF() + +# Per #374, we always want to build this test, but we only want to run +# it as a PERFORMANCE test. That's why we separate building the test +# from running the test. + +TRIBITS_ADD_EXECUTABLE( + PerfTestExec + SOURCES ${SOURCES} + COMM serial mpi + TESTONLYLIBS kokkos_gtest + ) + +TRIBITS_ADD_TEST( + PerformanceTest + NAME PerfTestExec + COMM serial mpi + NUM_MPI_PROCS 1 + CATEGORIES PERFORMANCE + FAIL_REGULAR_EXPRESSION " FAILED " + ) diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile new file mode 100644 index 0000000000..e7abaf44ce --- /dev/null +++ b/lib/kokkos/containers/performance_tests/Makefile @@ -0,0 +1,81 @@ +KOKKOS_PATH = ../.. + +GTEST_PATH = ../../TPL/gtest + +vpath %.cpp ${KOKKOS_PATH}/containers/performance_tests + +default: build_all + echo "End Build" + + +include $(KOKKOS_PATH)/Makefile.kokkos + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + CXX = $(NVCC_WRAPPER) + CXXFLAGS ?= -O3 + LINK = $(CXX) + LDFLAGS ?= -lpthread +else + CXX ?= g++ + CXXFLAGS ?= -O3 + LINK ?= $(CXX) + LDFLAGS ?= -lpthread +endif + +KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/containers/performance_tests + +TEST_TARGETS = +TARGETS = + +ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) + OBJ_CUDA = TestCuda.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_Cuda + TEST_TARGETS += test-cuda +endif + +ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) + OBJ_THREADS = TestThreads.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_Threads + TEST_TARGETS += test-threads +endif + +ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1) + OBJ_OPENMP = TestOpenMP.o TestMain.o gtest-all.o + TARGETS += KokkosContainers_PerformanceTest_OpenMP + TEST_TARGETS += test-openmp +endif + +KokkosContainers_PerformanceTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Cuda + +KokkosContainers_PerformanceTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_Threads + +KokkosContainers_PerformanceTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosContainers_PerformanceTest_OpenMP + +test-cuda: KokkosContainers_PerformanceTest_Cuda + ./KokkosContainers_PerformanceTest_Cuda + +test-threads: KokkosContainers_PerformanceTest_Threads + ./KokkosContainers_PerformanceTest_Threads + +test-openmp: KokkosContainers_PerformanceTest_OpenMP + ./KokkosContainers_PerformanceTest_OpenMP + + +build_all: $(TARGETS) + +test: $(TEST_TARGETS) + +clean: kokkos-clean + rm -f *.o $(TARGETS) + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< + +gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc + diff --git a/lib/kokkos/containers/performance_tests/TestCuda.cpp b/lib/kokkos/containers/performance_tests/TestCuda.cpp new file mode 100644 index 0000000000..8183adaa60 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestCuda.cpp @@ -0,0 +1,109 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#if defined( KOKKOS_HAVE_CUDA ) + +#include + +#include + +#include + +#include + +namespace Performance { + +class cuda : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + Kokkos::HostSpace::execution_space::initialize(); + Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) ); + } + static void TearDownTestCase() + { + Kokkos::Cuda::finalize(); + Kokkos::HostSpace::execution_space::finalize(); + } +}; + +TEST_F( cuda, dynrankview_perf ) +{ + std::cout << "Cuda" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 4096 ); +} + +TEST_F( cuda, global_2_local) +{ + std::cout << "Cuda" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( cuda, unordered_map_performance_near) +{ + Perf::run_performance_tests("cuda-near"); +} + +TEST_F( cuda, unordered_map_performance_far) +{ + Perf::run_performance_tests("cuda-far"); +} + +} + +#endif /* #if defined( KOKKOS_HAVE_CUDA ) */ diff --git a/lib/kokkos/containers/performance_tests/TestDynRankView.hpp b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp new file mode 100644 index 0000000000..aab6e6988f --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestDynRankView.hpp @@ -0,0 +1,265 @@ + +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_DYNRANKVIEW_HPP +#define KOKKOS_TEST_DYNRANKVIEW_HPP + +#include +#include +#include + +#include + +// Compare performance of DynRankView to View, specific focus on the parenthesis operators + +namespace Performance { + +//View functor +template +struct InitViewFunctor { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + InitViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + + struct SumComputationTest + { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + typedef Kokkos::View outviewtype; + outviewtype _outview; + + KOKKOS_INLINE_FUNCTION + SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _outview(i) += _inview(i,j,k) ; + } + } + } + }; + +}; + +template +struct InitStrideViewFunctor { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + InitStrideViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + +}; + +template +struct InitViewRank7Functor { + typedef Kokkos::View inviewtype; + inviewtype _inview; + + InitViewRank7Functor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3; + } + } + } + +}; + +//DynRankView functor +template +struct InitDynRankViewFunctor { + typedef Kokkos::DynRankView inviewtype; + inviewtype _inview; + + InitDynRankViewFunctor( inviewtype &inview_ ) : _inview(inview_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _inview(i,j,k) = i/2 -j*j + k/3; + } + } + } + + struct SumComputationTest + { + typedef Kokkos::DynRankView inviewtype; + inviewtype _inview; + + typedef Kokkos::DynRankView outviewtype; + outviewtype _outview; + + KOKKOS_INLINE_FUNCTION + SumComputationTest(inviewtype &inview_ , outviewtype &outview_) : _inview(inview_), _outview(outview_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const int i) const { + for (unsigned j = 0; j < _inview.dimension(1); ++j) { + for (unsigned k = 0; k < _inview.dimension(2); ++k) { + _outview(i) += _inview(i,j,k) ; + } + } + } + }; + +}; + + +template +void test_dynrankview_op_perf( const int par_size ) +{ + + typedef DeviceType execution_space; + typedef typename execution_space::size_type size_type; + const size_type dim2 = 900; + const size_type dim3 = 300; + + double elapsed_time_view = 0; + double elapsed_time_compview = 0; + double elapsed_time_strideview = 0; + double elapsed_time_view_rank7 = 0; + double elapsed_time_drview = 0; + double elapsed_time_compdrview = 0; + Kokkos::Timer timer; + { + Kokkos::View testview("testview",par_size,dim2,dim3); + typedef InitViewFunctor FunctorType; + + timer.reset(); + Kokkos::RangePolicy policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testview) ); + DeviceType::fence(); + elapsed_time_view = timer.seconds(); + std::cout << " View time (init only): " << elapsed_time_view << std::endl; + + + timer.reset(); + Kokkos::View sumview("sumview",par_size); + Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testview, sumview) ); + DeviceType::fence(); + elapsed_time_compview = timer.seconds(); + std::cout << " View sum computation time: " << elapsed_time_view << std::endl; + + + Kokkos::View teststrideview = Kokkos::subview(testview, Kokkos::ALL, Kokkos::ALL,Kokkos::ALL); + typedef InitStrideViewFunctor FunctorStrideType; + + timer.reset(); + Kokkos::parallel_for( policy , FunctorStrideType(teststrideview) ); + DeviceType::fence(); + elapsed_time_strideview = timer.seconds(); + std::cout << " Strided View time (init only): " << elapsed_time_strideview << std::endl; + } + { + Kokkos::View testview("testview",par_size,dim2,dim3,1,1,1,1); + typedef InitViewRank7Functor FunctorType; + + timer.reset(); + Kokkos::RangePolicy policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testview) ); + DeviceType::fence(); + elapsed_time_view_rank7 = timer.seconds(); + std::cout << " View Rank7 time (init only): " << elapsed_time_view_rank7 << std::endl; + } + { + Kokkos::DynRankView testdrview("testdrview",par_size,dim2,dim3); + typedef InitDynRankViewFunctor FunctorType; + + timer.reset(); + Kokkos::RangePolicy policy(0,par_size); + Kokkos::parallel_for( policy , FunctorType(testdrview) ); + DeviceType::fence(); + elapsed_time_drview = timer.seconds(); + std::cout << " DynRankView time (init only): " << elapsed_time_drview << std::endl; + + timer.reset(); + Kokkos::DynRankView sumview("sumview",par_size); + Kokkos::parallel_for( policy , typename FunctorType::SumComputationTest(testdrview, sumview) ); + DeviceType::fence(); + elapsed_time_compdrview = timer.seconds(); + std::cout << " DynRankView sum computation time: " << elapsed_time_compdrview << std::endl; + + } + + std::cout << " Ratio of View to DynRankView time: " << elapsed_time_view / elapsed_time_drview << std::endl; //expect < 1 + std::cout << " Ratio of View to DynRankView sum computation time: " << elapsed_time_compview / elapsed_time_compdrview << std::endl; //expect < 1 + std::cout << " Ratio of View to View Rank7 time: " << elapsed_time_view / elapsed_time_view_rank7 << std::endl; //expect < 1 + std::cout << " Ratio of StrideView to DynRankView time: " << elapsed_time_strideview / elapsed_time_drview << std::endl; //expect < 1 + std::cout << " Ratio of DynRankView to View Rank7 time: " << elapsed_time_drview / elapsed_time_view_rank7 << std::endl; //expect ? + + timer.reset(); + +} //end test_dynrankview + + +} //end Performance +#endif diff --git a/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp new file mode 100644 index 0000000000..66f1fbf092 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestGlobal2LocalIds.hpp @@ -0,0 +1,231 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP +#define KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP + +#include +#include +#include +#include + +#include + +// This test will simulate global ids + +namespace Performance { + +static const unsigned begin_id_size = 256u; +static const unsigned end_id_size = 1u << 22; +static const unsigned id_step = 2u; + +union helper +{ + uint32_t word; + uint8_t byte[4]; +}; + + +template +struct generate_ids +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View local_id_view; + + local_id_view local_2_global; + + generate_ids( local_id_view & ids) + : local_2_global(ids) + { + Kokkos::parallel_for(local_2_global.dimension_0(), *this); + } + + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + + helper x = {static_cast(i)}; + + // shuffle the bytes of i to create a unique, semi-random global_id + x.word = ~x.word; + + uint8_t tmp = x.byte[3]; + x.byte[3] = x.byte[1]; + x.byte[1] = tmp; + + tmp = x.byte[2]; + x.byte[2] = x.byte[0]; + x.byte[0] = tmp; + + local_2_global[i] = x.word; + } + +}; + +template +struct fill_map +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + fill_map( global_id_view gIds, local_id_view lIds) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_for(local_2_global.dimension_0(), *this); + } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i) const + { + global_2_local.insert( local_2_global[i], i); + } + +}; + +template +struct find_test +{ + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap global_id_view; + + global_id_view global_2_local; + local_id_view local_2_global; + + typedef size_t value_type; + + find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors) + : global_2_local(gIds) , local_2_global(lIds) + { + Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors); + } + + KOKKOS_INLINE_FUNCTION + void init(value_type & v) const + { v = 0; } + + KOKKOS_INLINE_FUNCTION + void join(volatile value_type & dst, volatile value_type const & src) const + { dst += src; } + + KOKKOS_INLINE_FUNCTION + void operator()(size_type i, value_type & num_errors) const + { + uint32_t index = global_2_local.find( local_2_global[i] ); + + if ( global_2_local.value_at(index) != i) ++num_errors; + } + +}; + +template +void test_global_to_local_ids(unsigned num_ids) +{ + + typedef Device execution_space; + typedef typename execution_space::size_type size_type; + + typedef Kokkos::View local_id_view; + typedef Kokkos::UnorderedMap global_id_view; + + //size + std::cout << num_ids << ", "; + + double elasped_time = 0; + Kokkos::Timer timer; + + local_id_view local_2_global("local_ids", num_ids); + global_id_view global_2_local((3u*num_ids)/2u); + + //create + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + // generate unique ids + { + generate_ids gen(local_2_global); + } + Device::fence(); + // generate + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + { + fill_map fill(global_2_local, local_2_global); + } + Device::fence(); + + // fill + elasped_time = timer.seconds(); + std::cout << elasped_time << ", "; + timer.reset(); + + + size_t num_errors = 0; + for (int i=0; i<100; ++i) + { + find_test find(global_2_local, local_2_global,num_errors); + } + Device::fence(); + + // find + elasped_time = timer.seconds(); + std::cout << elasped_time << std::endl; + + ASSERT_EQ( num_errors, 0u); +} + + +} // namespace Performance + + +#endif //KOKKOS_TEST_GLOBAL_TO_LOCAL_IDS_HPP + diff --git a/lib/kokkos/containers/performance_tests/TestMain.cpp b/lib/kokkos/containers/performance_tests/TestMain.cpp new file mode 100644 index 0000000000..f952ab3db5 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestMain.cpp @@ -0,0 +1,50 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +int main(int argc, char *argv[]) { + ::testing::InitGoogleTest(&argc,argv); + return RUN_ALL_TESTS(); +} + diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp new file mode 100644 index 0000000000..da74d32ac1 --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp @@ -0,0 +1,140 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include + + +namespace Performance { + +class openmp : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core() + ; + + } + + std::cout << "OpenMP: " << num_threads << std::endl; + + Kokkos::OpenMP::initialize( num_threads ); + + std::cout << "available threads: " << omp_get_max_threads() << std::endl; + } + + static void TearDownTestCase() + { + Kokkos::OpenMP::finalize(); + + omp_set_num_threads(1); + + ASSERT_EQ( 1 , omp_get_max_threads() ); + } +}; + +TEST_F( openmp, dynrankview_perf ) +{ + std::cout << "OpenMP" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 8192 ); +} + +TEST_F( openmp, global_2_local) +{ + std::cout << "OpenMP" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( openmp, unordered_map_performance_near) +{ + unsigned num_openmp = 4; + if (Kokkos::hwloc::available()) { + num_openmp = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "openmp-" << num_openmp << "-near"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( openmp, unordered_map_performance_far) +{ + unsigned num_openmp = 4; + if (Kokkos::hwloc::available()) { + num_openmp = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "openmp-" << num_openmp << "-far"; + Perf::run_performance_tests(base_file_name.str()); +} + +} // namespace test + diff --git a/lib/kokkos/containers/performance_tests/TestThreads.cpp b/lib/kokkos/containers/performance_tests/TestThreads.cpp new file mode 100644 index 0000000000..4179b7de4c --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestThreads.cpp @@ -0,0 +1,135 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include + +#include + +#include +#include + +#include + +#include +#include +#include +#include + +namespace Performance { + +class threads : public ::testing::Test { +protected: + static void SetUpTestCase() + { + std::cout << std::setprecision(5) << std::scientific; + + unsigned num_threads = 4; + + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + + std::cout << "Threads: " << num_threads << std::endl; + + Kokkos::Threads::initialize( num_threads ); + } + + static void TearDownTestCase() + { + Kokkos::Threads::finalize(); + } +}; + +TEST_F( threads, dynrankview_perf ) +{ + std::cout << "Threads" << std::endl; + std::cout << " DynRankView vs View: Initialization Only " << std::endl; + test_dynrankview_op_perf( 8192 ); +} + +TEST_F( threads, global_2_local) +{ + std::cout << "Threads" << std::endl; + std::cout << "size, create, generate, fill, find" << std::endl; + for (unsigned i=Performance::begin_id_size; i<=Performance::end_id_size; i *= Performance::id_step) + test_global_to_local_ids(i); +} + +TEST_F( threads, unordered_map_performance_near) +{ + unsigned num_threads = 4; + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "threads-" << num_threads << "-near"; + Perf::run_performance_tests(base_file_name.str()); +} + +TEST_F( threads, unordered_map_performance_far) +{ + unsigned num_threads = 4; + if (Kokkos::hwloc::available()) { + num_threads = Kokkos::hwloc::get_available_numa_count() * + Kokkos::hwloc::get_available_cores_per_numa() * + Kokkos::hwloc::get_available_threads_per_core(); + + } + std::ostringstream base_file_name; + base_file_name << "threads-" << num_threads << "-far"; + Perf::run_performance_tests(base_file_name.str()); +} + +} // namespace Performance + + diff --git a/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp new file mode 100644 index 0000000000..71d1182cbe --- /dev/null +++ b/lib/kokkos/containers/performance_tests/TestUnorderedMapPerformance.hpp @@ -0,0 +1,262 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER + +#ifndef KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP +#define KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP + +#include + +#include +#include +#include +#include +#include + + +namespace Perf { + +template +struct UnorderedMapTest +{ + typedef Device execution_space; + typedef Kokkos::UnorderedMap map_type; + typedef typename map_type::histogram_type histogram_type; + + struct value_type { + uint32_t failed_count; + uint32_t max_list; + }; + + uint32_t capacity; + uint32_t inserts; + uint32_t collisions; + double seconds; + map_type map; + histogram_type histogram; + + UnorderedMapTest( uint32_t arg_capacity, uint32_t arg_inserts, uint32_t arg_collisions) + : capacity(arg_capacity) + , inserts(arg_inserts) + , collisions(arg_collisions) + , seconds(0) + , map(capacity) + , histogram(map.get_histogram()) + { + Kokkos::Timer wall_clock ; + wall_clock.reset(); + + value_type v = {}; + int loop_count = 0; + do { + ++loop_count; + + v = value_type(); + Kokkos::parallel_reduce(inserts, *this, v); + + if (v.failed_count > 0u) { + const uint32_t new_capacity = map.capacity() + ((map.capacity()*3ull)/20u) + v.failed_count/collisions ; + map.rehash( new_capacity ); + } + } while (v.failed_count > 0u); + + seconds = wall_clock.seconds(); + + switch (loop_count) + { + case 1u: std::cout << " \033[0;32m" << loop_count << "\033[0m "; break; + case 2u: std::cout << " \033[1;31m" << loop_count << "\033[0m "; break; + default: std::cout << " \033[0;31m" << loop_count << "\033[0m "; break; + } + std::cout << std::setprecision(2) << std::fixed << std::setw(5) << (1e9*(seconds/(inserts))) << "; " << std::flush; + + histogram.calculate(); + Device::fence(); + } + + void print(std::ostream & metrics_out, std::ostream & length_out, std::ostream & distance_out, std::ostream & block_distance_out) + { + metrics_out << map.capacity() << " , "; + metrics_out << inserts/collisions << " , "; + metrics_out << (100.0 * inserts/collisions) / map.capacity() << " , "; + metrics_out << inserts << " , "; + metrics_out << (map.failed_insert() ? "true" : "false") << " , "; + metrics_out << collisions << " , "; + metrics_out << 1e9*(seconds/inserts) << " , "; + metrics_out << seconds << std::endl; + + length_out << map.capacity() << " , "; + length_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + length_out << collisions << " , "; + histogram.print_length(length_out); + + distance_out << map.capacity() << " , "; + distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + distance_out << collisions << " , "; + histogram.print_distance(distance_out); + + block_distance_out << map.capacity() << " , "; + block_distance_out << ((100.0 *inserts/collisions) / map.capacity()) << " , "; + block_distance_out << collisions << " , "; + histogram.print_block_distance(block_distance_out); + } + + + KOKKOS_INLINE_FUNCTION + void init( value_type & v ) const + { + v.failed_count = 0; + v.max_list = 0; + } + + KOKKOS_INLINE_FUNCTION + void join( volatile value_type & dst, const volatile value_type & src ) const + { + dst.failed_count += src.failed_count; + dst.max_list = src.max_list < dst.max_list ? dst.max_list : src.max_list; + } + + KOKKOS_INLINE_FUNCTION + void operator()(uint32_t i, value_type & v) const + { + const uint32_t key = Near ? i/collisions : i%(inserts/collisions); + typename map_type::insert_result result = map.insert(key,i); + v.failed_count += !result.failed() ? 0 : 1; + v.max_list = result.list_position() < v.max_list ? v.max_list : result.list_position(); + } + +}; + +//#define KOKKOS_COLLECT_UNORDERED_MAP_METRICS + +template +void run_performance_tests(std::string const & base_file_name) +{ +#if defined(KOKKOS_COLLECT_UNORDERED_MAP_METRICS) + std::string metrics_file_name = base_file_name + std::string("-metrics.csv"); + std::string length_file_name = base_file_name + std::string("-length.csv"); + std::string distance_file_name = base_file_name + std::string("-distance.csv"); + std::string block_distance_file_name = base_file_name + std::string("-block_distance.csv"); + + std::ofstream metrics_out( metrics_file_name.c_str(), std::ofstream::out ); + std::ofstream length_out( length_file_name.c_str(), std::ofstream::out ); + std::ofstream distance_out( distance_file_name.c_str(), std::ofstream::out ); + std::ofstream block_distance_out( block_distance_file_name.c_str(), std::ofstream::out ); + + + /* + const double test_ratios[] = { + 0.50 + , 0.75 + , 0.80 + , 0.85 + , 0.90 + , 0.95 + , 1.00 + , 1.25 + , 2.00 + }; + */ + + const double test_ratios[] = { 1.00 }; + + const int num_ratios = sizeof(test_ratios) / sizeof(double); + + /* + const uint32_t collisions[] { + 1 + , 4 + , 16 + , 64 + }; + */ + + const uint32_t collisions[] = { 16 }; + + const int num_collisions = sizeof(collisions) / sizeof(uint32_t); + + // set up file headers + metrics_out << "Capacity , Unique , Percent Full , Attempted Inserts , Failed Inserts , Collision Ratio , Nanoseconds/Inserts, Seconds" << std::endl; + length_out << "Capacity , Percent Full , "; + distance_out << "Capacity , Percent Full , "; + block_distance_out << "Capacity , Percent Full , "; + + for (int i=0; i<100; ++i) { + length_out << i << " , "; + distance_out << i << " , "; + block_distance_out << i << " , "; + } + + length_out << "\b\b\b " << std::endl; + distance_out << "\b\b\b " << std::endl; + block_distance_out << "\b\b\b " << std::endl; + + Kokkos::Timer wall_clock ; + for (int i=0; i < num_collisions ; ++i) { + wall_clock.reset(); + std::cout << "Collisions: " << collisions[i] << std::endl; + for (int j = 0; j < num_ratios; ++j) { + std::cout << std::setprecision(1) << std::fixed << std::setw(5) << (100.0*test_ratios[j]) << "% " << std::flush; + for (uint32_t capacity = 1<<14; capacity < 1<<25; capacity = capacity << 1) { + uint32_t inserts = static_cast(test_ratios[j]*(capacity)); + std::cout << capacity << std::flush; + UnorderedMapTest test(capacity, inserts*collisions[i], collisions[i]); + Device::fence(); + test.print(metrics_out, length_out, distance_out, block_distance_out); + } + std::cout << "\b\b " << std::endl; + + } + std::cout << " " << wall_clock.seconds() << " secs" << std::endl; + } + metrics_out.close(); + length_out.close(); + distance_out.close(); + block_distance_out.close(); +#else + (void)base_file_name; + std::cout << "skipping test" << std::endl; +#endif +} + + +} // namespace Perf + +#endif //KOKKOS_TEST_UNORDERED_MAP_PERFORMANCE_HPP diff --git a/lib/kokkos/containers/src/CMakeLists.txt b/lib/kokkos/containers/src/CMakeLists.txt new file mode 100644 index 0000000000..da5a791530 --- /dev/null +++ b/lib/kokkos/containers/src/CMakeLists.txt @@ -0,0 +1,31 @@ + +TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h) + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +#----------------------------------------------------------------------------- + +SET(HEADERS "") +SET(SOURCES "") + +SET(HEADERS_IMPL "") + +FILE(GLOB HEADERS *.hpp) +FILE(GLOB HEADERS_IMPL impl/*.hpp) +FILE(GLOB SOURCES impl/*.cpp) + +SET(TRILINOS_INCDIR ${CMAKE_INSTALL_PREFIX}/${${PROJECT_NAME}_INSTALL_INCLUDE_DIR}) + +INSTALL(FILES ${HEADERS_IMPL} DESTINATION ${TRILINOS_INCDIR}/impl/) + +TRIBITS_ADD_LIBRARY( + kokkoscontainers + HEADERS ${HEADERS} + NOINSTALLHEADERS ${HEADERS_IMPL} + SOURCES ${SOURCES} + DEPLIBS + ) + +#----------------------------------------------------------------------------- + diff --git a/lib/kokkos/containers/src/Kokkos_Bitset.hpp b/lib/kokkos/containers/src/Kokkos_Bitset.hpp new file mode 100644 index 0000000000..74da5f61b5 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_Bitset.hpp @@ -0,0 +1,437 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_BITSET_HPP +#define KOKKOS_BITSET_HPP + +#include +#include + +#include + +#include + +namespace Kokkos { + +template +class Bitset; + +template +class ConstBitset; + +template +void deep_copy( Bitset & dst, Bitset const& src); + +template +void deep_copy( Bitset & dst, ConstBitset const& src); + +template +void deep_copy( ConstBitset & dst, ConstBitset const& src); + + +/// A thread safe view to a bitset +template +class Bitset +{ +public: + typedef Device execution_space; + typedef unsigned size_type; + + enum { BIT_SCAN_REVERSE = 1u }; + enum { MOVE_HINT_BACKWARD = 2u }; + + enum { + BIT_SCAN_FORWARD_MOVE_HINT_FORWARD = 0u + , BIT_SCAN_REVERSE_MOVE_HINT_FORWARD = BIT_SCAN_REVERSE + , BIT_SCAN_FORWARD_MOVE_HINT_BACKWARD = MOVE_HINT_BACKWARD + , BIT_SCAN_REVERSE_MOVE_HINT_BACKWARD = BIT_SCAN_REVERSE | MOVE_HINT_BACKWARD + }; + +private: + enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; + enum { block_mask = block_size-1u }; + enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; + +public: + + + /// constructor + /// arg_size := number of bit in set + Bitset(unsigned arg_size = 0u) + : m_size(arg_size) + , m_last_block_mask(0u) + , m_blocks("Bitset", ((m_size + block_mask) >> block_shift) ) + { + for (int i=0, end = static_cast(m_size & block_mask); i < end; ++i) { + m_last_block_mask |= 1u << i; + } + } + + /// assignment + Bitset & operator = (Bitset const & rhs) + { + this->m_size = rhs.m_size; + this->m_last_block_mask = rhs.m_last_block_mask; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + /// copy constructor + Bitset( Bitset const & rhs) + : m_size( rhs.m_size ) + , m_last_block_mask( rhs.m_last_block_mask ) + , m_blocks( rhs.m_blocks ) + {} + + /// number of bits in the set + /// can be call from the host or the device + KOKKOS_FORCEINLINE_FUNCTION + unsigned size() const + { return m_size; } + + /// number of bits which are set to 1 + /// can only be called from the host + unsigned count() const + { + Impl::BitsetCount< Bitset > f(*this); + return f.apply(); + } + + /// set all bits to 1 + /// can only be called from the host + void set() + { + Kokkos::deep_copy(m_blocks, ~0u ); + + if (m_last_block_mask) { + //clear the unused bits in the last block + typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy; + raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned)); + } + } + + /// set all bits to 0 + /// can only be called from the host + void reset() + { + Kokkos::deep_copy(m_blocks, 0u ); + } + + /// set all bits to 0 + /// can only be called from the host + void clear() + { + Kokkos::deep_copy(m_blocks, 0u ); + } + + /// set i'th bit to 1 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool set( unsigned i ) const + { + if ( i < m_size ) { + unsigned * block_ptr = &m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast( i & block_mask ); + + return !( atomic_fetch_or( block_ptr, mask ) & mask ); + } + return false; + } + + /// set i'th bit to 0 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool reset( unsigned i ) const + { + if ( i < m_size ) { + unsigned * block_ptr = &m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast( i & block_mask ); + + return atomic_fetch_and( block_ptr, ~mask ) & mask; + } + return false; + } + + /// return true if the i'th bit set to 1 + /// can only be called from the device + KOKKOS_FORCEINLINE_FUNCTION + bool test( unsigned i ) const + { + if ( i < m_size ) { + const unsigned block = volatile_load(&m_blocks[ i >> block_shift ]); + const unsigned mask = 1u << static_cast( i & block_mask ); + return block & mask; + } + return false; + } + + /// used with find_any_set_near or find_any_unset_near functions + /// returns the max number of times those functions should be call + /// when searching for an available bit + KOKKOS_FORCEINLINE_FUNCTION + unsigned max_hint() const + { + return m_blocks.dimension_0(); + } + + /// find a bit set to 1 near the hint + /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found + /// and if result.first is false the result.second is a new hint + KOKKOS_INLINE_FUNCTION + Kokkos::pair find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const + { + const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[ block_idx ]); + block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ; + + return find_any_helper(block_idx, offset, block, scan_direction); + } + + /// find a bit set to 0 near the hint + /// returns a pair< bool, unsigned> where if result.first is true then result.second is the bit found + /// and if result.first is false the result.second is a new hint + KOKKOS_INLINE_FUNCTION + Kokkos::pair find_any_unset_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const + { + const unsigned block_idx = hint >> block_shift; + const unsigned offset = hint & block_mask; + unsigned block = volatile_load(&m_blocks[ block_idx ]); + block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ; + + return find_any_helper(block_idx, offset, block, scan_direction); + } + +private: + + KOKKOS_FORCEINLINE_FUNCTION + Kokkos::pair find_any_helper(unsigned block_idx, unsigned offset, unsigned block, unsigned scan_direction) const + { + Kokkos::pair result( block > 0u, 0); + + if (!result.first) { + result.second = update_hint( block_idx, offset, scan_direction ); + } + else { + result.second = scan_block( (block_idx << block_shift) + , offset + , block + , scan_direction + ); + } + return result; + } + + + KOKKOS_FORCEINLINE_FUNCTION + unsigned scan_block(unsigned block_start, int offset, unsigned block, unsigned scan_direction ) const + { + offset = !(scan_direction & BIT_SCAN_REVERSE) ? offset : (offset + block_mask) & block_mask; + block = Impl::rotate_right(block, offset); + return ((( !(scan_direction & BIT_SCAN_REVERSE) ? + Impl::bit_scan_forward(block) : + Impl::bit_scan_reverse(block) + ) + offset + ) & block_mask + ) + block_start; + } + + KOKKOS_FORCEINLINE_FUNCTION + unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const + { + block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1; + block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1; + block_idx = block_idx < static_cast(m_blocks.dimension_0()) ? block_idx : 0; + + return static_cast(block_idx)*block_size + offset; + } + +private: + + unsigned m_size; + unsigned m_last_block_mask; + View< unsigned *, execution_space, MemoryTraits > m_blocks; + +private: + template + friend class Bitset; + + template + friend class ConstBitset; + + template + friend struct Impl::BitsetCount; + + template + friend void deep_copy( Bitset & dst, Bitset const& src); + + template + friend void deep_copy( Bitset & dst, ConstBitset const& src); +}; + +/// a thread-safe view to a const bitset +/// i.e. can only test bits +template +class ConstBitset +{ +public: + typedef Device execution_space; + typedef unsigned size_type; + +private: + enum { block_size = static_cast(sizeof(unsigned)*CHAR_BIT) }; + enum { block_mask = block_size -1u }; + enum { block_shift = Kokkos::Impl::integral_power_of_two(block_size) }; + +public: + ConstBitset() + : m_size (0) + {} + + ConstBitset(Bitset const& rhs) + : m_size(rhs.m_size) + , m_blocks(rhs.m_blocks) + {} + + ConstBitset(ConstBitset const& rhs) + : m_size( rhs.m_size ) + , m_blocks( rhs.m_blocks ) + {} + + ConstBitset & operator = (Bitset const & rhs) + { + this->m_size = rhs.m_size; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + ConstBitset & operator = (ConstBitset const & rhs) + { + this->m_size = rhs.m_size; + this->m_blocks = rhs.m_blocks; + + return *this; + } + + + KOKKOS_FORCEINLINE_FUNCTION + unsigned size() const + { + return m_size; + } + + unsigned count() const + { + Impl::BitsetCount< ConstBitset > f(*this); + return f.apply(); + } + + KOKKOS_FORCEINLINE_FUNCTION + bool test( unsigned i ) const + { + if ( i < m_size ) { + const unsigned block = m_blocks[ i >> block_shift ]; + const unsigned mask = 1u << static_cast( i & block_mask ); + return block & mask; + } + return false; + } + +private: + + unsigned m_size; + View< const unsigned *, execution_space, MemoryTraits > m_blocks; + +private: + template + friend class ConstBitset; + + template + friend struct Impl::BitsetCount; + + template + friend void deep_copy( Bitset & dst, ConstBitset const& src); + + template + friend void deep_copy( ConstBitset & dst, ConstBitset const& src); +}; + + +template +void deep_copy( Bitset & dst, Bitset const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +template +void deep_copy( Bitset & dst, ConstBitset const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +template +void deep_copy( ConstBitset & dst, ConstBitset const& src) +{ + if (dst.size() != src.size()) { + throw std::runtime_error("Error: Cannot deep_copy bitsets of different sizes!"); + } + + typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy; + raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0()); +} + +} // namespace Kokkos + +#endif //KOKKOS_BITSET_HPP diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp new file mode 100644 index 0000000000..1230df4d97 --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp @@ -0,0 +1,982 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_DualView.hpp +/// \brief Declaration and definition of Kokkos::DualView. +/// +/// This header file declares and defines Kokkos::DualView and its +/// related nonmember functions. + +#ifndef KOKKOS_DUALVIEW_HPP +#define KOKKOS_DUALVIEW_HPP + +#include +#include + +namespace Kokkos { + +/* \class DualView + * \brief Container to manage mirroring a Kokkos::View that lives + * in device memory with a Kokkos::View that lives in host memory. + * + * This class provides capabilities to manage data which exists in two + * memory spaces at the same time. It keeps views of the same layout + * on two memory spaces as well as modified flags for both + * allocations. Users are responsible for setting the modified flags + * manually if they change the data in either memory space, by calling + * the sync() method templated on the device where they modified the + * data. Users may synchronize data by calling the modify() function, + * templated on the device towards which they want to synchronize + * (i.e., the target of the one-way copy operation). + * + * The DualView class also provides convenience methods such as + * realloc, resize and capacity which call the appropriate methods of + * the underlying Kokkos::View objects. + * + * The four template arguments are the same as those of Kokkos::View. + * (Please refer to that class' documentation for a detailed + * description.) + * + * \tparam DataType The type of the entries stored in the container. + * + * \tparam Layout The array's layout in memory. + * + * \tparam Device The Kokkos Device type. If its memory space is + * not the same as the host's memory space, then DualView will + * contain two separate Views: one in device memory, and one in + * host memory. Otherwise, DualView will only store one View. + * + * \tparam MemoryTraits (optional) The user's intended memory access + * behavior. Please see the documentation of Kokkos::View for + * examples. The default suffices for most users. + */ +template< class DataType , + class Arg1Type = void , + class Arg2Type = void , + class Arg3Type = void> +class DualView : public ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > +{ +public: + //! \name Typedefs for device types and various Kokkos::View specializations. + //@{ + typedef ViewTraits< DataType , Arg1Type , Arg2Type, Arg3Type > traits ; + + //! The Kokkos Host Device type; + typedef typename traits::host_mirror_space host_mirror_space ; + + //! The type of a Kokkos::View on the device. + typedef View< typename traits::data_type , + Arg1Type , + Arg2Type , + Arg3Type > t_dev ; + + /// \typedef t_host + /// \brief The type of a Kokkos::View host mirror of \c t_dev. + typedef typename t_dev::HostMirror t_host ; + + //! The type of a const View on the device. + //! The type of a Kokkos::View on the device. + typedef View< typename traits::const_data_type , + Arg1Type , + Arg2Type , + Arg3Type > t_dev_const ; + + /// \typedef t_host_const + /// \brief The type of a const View host mirror of \c t_dev_const. + typedef typename t_dev_const::HostMirror t_host_const; + + //! The type of a const, random-access View on the device. + typedef View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + Kokkos::MemoryTraits > t_dev_const_randomread ; + + /// \typedef t_host_const_randomread + /// \brief The type of a const, random-access View host mirror of + /// \c t_dev_const_randomread. + typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread; + + //! The type of an unmanaged View on the device. + typedef View< typename traits::data_type , + typename traits::array_layout , + typename traits::device_type , + MemoryUnmanaged> t_dev_um; + + //! The type of an unmanaged View host mirror of \c t_dev_um. + typedef View< typename t_host::data_type , + typename t_host::array_layout , + typename t_host::device_type , + MemoryUnmanaged> t_host_um; + + //! The type of a const unmanaged View on the device. + typedef View< typename traits::const_data_type , + typename traits::array_layout , + typename traits::device_type , + MemoryUnmanaged> t_dev_const_um; + + //! The type of a const unmanaged View host mirror of \c t_dev_const_um. + typedef View t_host_const_um; + + //! The type of a const, random-access View on the device. + typedef View< typename t_host::const_data_type , + typename t_host::array_layout , + typename t_host::device_type , + Kokkos::MemoryTraits > t_dev_const_randomread_um ; + + /// \typedef t_host_const_randomread + /// \brief The type of a const, random-access View host mirror of + /// \c t_dev_const_randomread. + typedef typename t_dev_const_randomread::HostMirror t_host_const_randomread_um; + + //@} + //! \name The two View instances. + //@{ + + t_dev d_view; + t_host h_view; + + //@} + //! \name Counters to keep track of changes ("modified" flags) + //@{ + + View modified_device; + View modified_host; + + //@} + //! \name Constructors + //@{ + + /// \brief Empty constructor. + /// + /// Both device and host View objects are constructed using their + /// default constructors. The "modified" flags are both initialized + /// to "unmodified." + DualView () : + modified_device (View ("DualView::modified_device")), + modified_host (View ("DualView::modified_host")) + {} + + /// \brief Constructor that allocates View objects on both host and device. + /// + /// This constructor works like the analogous constructor of View. + /// The first argument is a string label, which is entirely for your + /// benefit. (Different DualView objects may have the same label if + /// you like.) The arguments that follow are the dimensions of the + /// View objects. For example, if the View has three dimensions, + /// the first three integer arguments will be nonzero, and you may + /// omit the integer arguments that follow. + DualView (const std::string& label, + const size_t n0 = 0, + const size_t n1 = 0, + const size_t n2 = 0, + const size_t n3 = 0, + const size_t n4 = 0, + const size_t n5 = 0, + const size_t n6 = 0, + const size_t n7 = 0) + : d_view (label, n0, n1, n2, n3, n4, n5, n6, n7) + , h_view (create_mirror_view (d_view)) // without UVM, host View mirrors + , modified_device (View ("DualView::modified_device")) + , modified_host (View ("DualView::modified_host")) + {} + + //! Copy constructor (shallow copy) + template + DualView (const DualView& src) : + d_view (src.d_view), + h_view (src.h_view), + modified_device (src.modified_device), + modified_host (src.modified_host) + {} + + //! Subview constructor + template< class SD, class S1 , class S2 , class S3 + , class Arg0 , class ... Args > + DualView( const DualView & src + , const Arg0 & arg0 + , Args ... args + ) + : d_view( Kokkos::subview( src.d_view , arg0 , args ... ) ) + , h_view( Kokkos::subview( src.h_view , arg0 , args ... ) ) + , modified_device (src.modified_device) + , modified_host (src.modified_host) + {} + + /// \brief Create DualView from existing device and host View objects. + /// + /// This constructor assumes that the device and host View objects + /// are synchronized. You, the caller, are responsible for making + /// sure this is the case before calling this constructor. After + /// this constructor returns, you may use DualView's sync() and + /// modify() methods to ensure synchronization of the View objects. + /// + /// \param d_view_ Device View + /// \param h_view_ Host View (must have type t_host = t_dev::HostMirror) + DualView (const t_dev& d_view_, const t_host& h_view_) : + d_view (d_view_), + h_view (h_view_), + modified_device (View ("DualView::modified_device")), + modified_host (View ("DualView::modified_host")) + { +#if ! KOKKOS_USING_EXP_VIEW + Impl::assert_shapes_are_equal (d_view.shape (), h_view.shape ()); +#else + if ( int(d_view.rank) != int(h_view.rank) || + d_view.dimension_0() != h_view.dimension_0() || + d_view.dimension_1() != h_view.dimension_1() || + d_view.dimension_2() != h_view.dimension_2() || + d_view.dimension_3() != h_view.dimension_3() || + d_view.dimension_4() != h_view.dimension_4() || + d_view.dimension_5() != h_view.dimension_5() || + d_view.dimension_6() != h_view.dimension_6() || + d_view.dimension_7() != h_view.dimension_7() || + d_view.stride_0() != h_view.stride_0() || + d_view.stride_1() != h_view.stride_1() || + d_view.stride_2() != h_view.stride_2() || + d_view.stride_3() != h_view.stride_3() || + d_view.stride_4() != h_view.stride_4() || + d_view.stride_5() != h_view.stride_5() || + d_view.stride_6() != h_view.stride_6() || + d_view.stride_7() != h_view.stride_7() || + d_view.span() != h_view.span() ) { + Kokkos::Impl::throw_runtime_exception("DualView constructed with incompatible views"); + } +#endif + } + + //@} + //! \name Methods for synchronizing, marking as modified, and getting Views. + //@{ + + /// \brief Return a View on a specific device \c Device. + /// + /// Please don't be afraid of the if_c expression in the return + /// value's type. That just tells the method what the return type + /// should be: t_dev if the \c Device template parameter matches + /// this DualView's device type, else t_host. + /// + /// For example, suppose you create a DualView on Cuda, like this: + /// \code + /// typedef Kokkos::DualView dual_view_type; + /// dual_view_type DV ("my dual view", 100); + /// \endcode + /// If you want to get the CUDA device View, do this: + /// \code + /// typename dual_view_type::t_dev cudaView = DV.view (); + /// \endcode + /// and if you want to get the host mirror of that View, do this: + /// \code + /// typedef typename Kokkos::HostSpace::execution_space host_device_type; + /// typename dual_view_type::t_host hostView = DV.view (); + /// \endcode + template< class Device > + KOKKOS_INLINE_FUNCTION + const typename Impl::if_c< + Impl::is_same::value, + t_dev, + t_host>::type& view () const + { + return Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + t_dev, + t_host >::select (d_view , h_view); + } + + /// \brief Update data on device or host only if data in the other + /// space has been marked as modified. + /// + /// If \c Device is the same as this DualView's device type, then + /// copy data from host to device. Otherwise, copy data from device + /// to host. In either case, only copy if the source of the copy + /// has been modified. + /// + /// This is a one-way synchronization only. If the target of the + /// copy has been modified, this operation will discard those + /// modifications. It will also reset both device and host modified + /// flags. + /// + /// \note This method doesn't know on its own whether you modified + /// the data in either View. You must manually mark modified data + /// as modified, by calling the modify() method with the + /// appropriate template parameter. + template + void sync( const typename Impl::enable_if< + ( Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value) || + ( Impl::is_same< Device , int>::value) + , int >::type& = 0) + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value , + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + deep_copy (d_view, h_view); + modified_host() = modified_device() = 0; + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + deep_copy (h_view, d_view); + modified_host() = modified_device() = 0; + } + } + if(Impl::is_same::value) { + t_dev::execution_space::fence(); + t_host::execution_space::fence(); + } + } + + template + void sync ( const typename Impl::enable_if< + ( ! Impl::is_same< typename traits::data_type , typename traits::non_const_data_type>::value ) || + ( Impl::is_same< Device , int>::value) + , int >::type& = 0 ) + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + unsigned int, + unsigned int>::select (1, 0); + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + Impl::throw_runtime_exception("Calling sync on a DualView with a const datatype."); + } + } + } + + template + bool need_sync() const + { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value , + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + if ((modified_host () > 0) && (modified_host () >= modified_device ())) { + return true; + } + } else { // hopefully Device is the same as DualView's host type + if ((modified_device () > 0) && (modified_device () >= modified_host ())) { + return true; + } + } + return false; + } + /// \brief Mark data as modified on the given device \c Device. + /// + /// If \c Device is the same as this DualView's device type, then + /// mark the device's data as modified. Otherwise, mark the host's + /// data as modified. + template + void modify () { + const unsigned int dev = + Impl::if_c< + Impl::is_same< + typename t_dev::memory_space, + typename Device::memory_space>::value, + unsigned int, + unsigned int>::select (1, 0); + + if (dev) { // if Device is the same as DualView's device type + // Increment the device's modified count. + modified_device () = (modified_device () > modified_host () ? + modified_device () : modified_host ()) + 1; + } else { // hopefully Device is the same as DualView's host type + // Increment the host's modified count. + modified_host () = (modified_device () > modified_host () ? + modified_device () : modified_host ()) + 1; + } + } + + //@} + //! \name Methods for reallocating or resizing the View objects. + //@{ + + /// \brief Reallocate both View objects. + /// + /// This discards any existing contents of the objects, and resets + /// their modified flags. It does not copy the old contents + /// of either View into the new View objects. + void realloc( const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) { + ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + h_view = create_mirror_view( d_view ); + + /* Reset dirty flags */ + modified_device() = modified_host() = 0; + } + + /// \brief Resize both views, copying old contents into new if necessary. + /// + /// This method only copies the old contents into the new View + /// objects for the device which was last marked as modified. + void resize( const size_t n0 = 0 , + const size_t n1 = 0 , + const size_t n2 = 0 , + const size_t n3 = 0 , + const size_t n4 = 0 , + const size_t n5 = 0 , + const size_t n6 = 0 , + const size_t n7 = 0 ) { + if(modified_device() >= modified_host()) { + /* Resize on Device */ + ::Kokkos::resize(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + h_view = create_mirror_view( d_view ); + + /* Mark Device copy as modified */ + modified_device() = modified_device()+1; + + } else { + /* Realloc on Device */ + + ::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7); + t_host temp_view = create_mirror_view( d_view ); + + /* Remap on Host */ + Kokkos::deep_copy( temp_view , h_view ); + + h_view = temp_view; + + /* Mark Host copy as modified */ + modified_host() = modified_host()+1; + } + } + + //@} + //! \name Methods for getting capacity, stride, or dimension(s). + //@{ + + //! The allocation size (same as Kokkos::View::capacity). + size_t capacity() const { +#if KOKKOS_USING_EXP_VIEW + return d_view.span(); +#else + return d_view.capacity(); +#endif + } + + //! Get stride(s) for each dimension. + template< typename iType> + void stride(iType* stride_) const { + d_view.stride(stride_); + } + + /* \brief return size of dimension 0 */ + size_t dimension_0() const {return d_view.dimension_0();} + /* \brief return size of dimension 1 */ + size_t dimension_1() const {return d_view.dimension_1();} + /* \brief return size of dimension 2 */ + size_t dimension_2() const {return d_view.dimension_2();} + /* \brief return size of dimension 3 */ + size_t dimension_3() const {return d_view.dimension_3();} + /* \brief return size of dimension 4 */ + size_t dimension_4() const {return d_view.dimension_4();} + /* \brief return size of dimension 5 */ + size_t dimension_5() const {return d_view.dimension_5();} + /* \brief return size of dimension 6 */ + size_t dimension_6() const {return d_view.dimension_6();} + /* \brief return size of dimension 7 */ + size_t dimension_7() const {return d_view.dimension_7();} + + //@} +}; + +} // namespace Kokkos + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// +// Partial specializations of Kokkos::subview() for DualView objects. +// + +#if KOKKOS_USING_EXP_VIEW + +namespace Kokkos { +namespace Impl { + +template< class D, class A1, class A2, class A3, class ... Args > +struct DualViewSubview { + + typedef typename Kokkos::Experimental::Impl::ViewMapping + < void + , Kokkos::ViewTraits< D, A1, A2, A3 > + , Args ... + >::traits_type dst_traits ; + + typedef Kokkos::DualView + < typename dst_traits::data_type + , typename dst_traits::array_layout + , typename dst_traits::device_type + , typename dst_traits::memory_traits + > type ; +}; + +} /* namespace Impl */ + + +template< class D , class A1 , class A2 , class A3 , class ... Args > +typename Impl::DualViewSubview::type +subview( const DualView & src , Args ... args ) +{ + return typename + Impl::DualViewSubview::type( src , args ... ); +} + +} /* namespace Kokkos */ + +#else + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- +// +// Partial specializations of Kokkos::subview() for DualView objects. +// + +namespace Kokkos { +namespace Impl { + +template< class SrcDataType , class SrcArg1Type , class SrcArg2Type , class SrcArg3Type + , class SubArg0_type , class SubArg1_type , class SubArg2_type , class SubArg3_type + , class SubArg4_type , class SubArg5_type , class SubArg6_type , class SubArg7_type + > +struct ViewSubview< DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > + , SubArg0_type , SubArg1_type , SubArg2_type , SubArg3_type + , SubArg4_type , SubArg5_type , SubArg6_type , SubArg7_type > +{ +private: + + typedef DualView< SrcDataType , SrcArg1Type , SrcArg2Type , SrcArg3Type > SrcViewType ; + + enum { V0 = Impl::is_same< SubArg0_type , void >::value ? 1 : 0 }; + enum { V1 = Impl::is_same< SubArg1_type , void >::value ? 1 : 0 }; + enum { V2 = Impl::is_same< SubArg2_type , void >::value ? 1 : 0 }; + enum { V3 = Impl::is_same< SubArg3_type , void >::value ? 1 : 0 }; + enum { V4 = Impl::is_same< SubArg4_type , void >::value ? 1 : 0 }; + enum { V5 = Impl::is_same< SubArg5_type , void >::value ? 1 : 0 }; + enum { V6 = Impl::is_same< SubArg6_type , void >::value ? 1 : 0 }; + enum { V7 = Impl::is_same< SubArg7_type , void >::value ? 1 : 0 }; + + // The source view rank must be equal to the input argument rank + // Once a void argument is encountered all subsequent arguments must be void. + enum { InputRank = + Impl::StaticAssert<( SrcViewType::rank == + ( V0 ? 0 : ( + V1 ? 1 : ( + V2 ? 2 : ( + V3 ? 3 : ( + V4 ? 4 : ( + V5 ? 5 : ( + V6 ? 6 : ( + V7 ? 7 : 8 ))))))) )) + && + ( SrcViewType::rank == + ( 8 - ( V0 + V1 + V2 + V3 + V4 + V5 + V6 + V7 ) ) ) + >::value ? SrcViewType::rank : 0 }; + + enum { R0 = Impl::ViewOffsetRange< SubArg0_type >::is_range ? 1 : 0 }; + enum { R1 = Impl::ViewOffsetRange< SubArg1_type >::is_range ? 1 : 0 }; + enum { R2 = Impl::ViewOffsetRange< SubArg2_type >::is_range ? 1 : 0 }; + enum { R3 = Impl::ViewOffsetRange< SubArg3_type >::is_range ? 1 : 0 }; + enum { R4 = Impl::ViewOffsetRange< SubArg4_type >::is_range ? 1 : 0 }; + enum { R5 = Impl::ViewOffsetRange< SubArg5_type >::is_range ? 1 : 0 }; + enum { R6 = Impl::ViewOffsetRange< SubArg6_type >::is_range ? 1 : 0 }; + enum { R7 = Impl::ViewOffsetRange< SubArg7_type >::is_range ? 1 : 0 }; + + enum { OutputRank = unsigned(R0) + unsigned(R1) + unsigned(R2) + unsigned(R3) + + unsigned(R4) + unsigned(R5) + unsigned(R6) + unsigned(R7) }; + + // Reverse + enum { R0_rev = 0 == InputRank ? 0u : ( + 1 == InputRank ? unsigned(R0) : ( + 2 == InputRank ? unsigned(R1) : ( + 3 == InputRank ? unsigned(R2) : ( + 4 == InputRank ? unsigned(R3) : ( + 5 == InputRank ? unsigned(R4) : ( + 6 == InputRank ? unsigned(R5) : ( + 7 == InputRank ? unsigned(R6) : unsigned(R7) ))))))) }; + + typedef typename SrcViewType::array_layout SrcViewLayout ; + + // Choose array layout, attempting to preserve original layout if at all possible. + typedef typename Impl::if_c< + ( // Same Layout IF + // OutputRank 0 + ( OutputRank == 0 ) + || + // OutputRank 1 or 2, InputLayout Left, Interval 0 + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0 && Impl::is_same::value ) + || + // OutputRank 1 or 2, InputLayout Right, Interval [InputRank-1] + // because single stride one or second index has a stride. + ( OutputRank <= 2 && R0_rev && Impl::is_same::value ) + ), SrcViewLayout , Kokkos::LayoutStride >::type OutputViewLayout ; + + // Choose data type as a purely dynamic rank array to accomodate a runtime range. + typedef typename Impl::if_c< OutputRank == 0 , typename SrcViewType::value_type , + typename Impl::if_c< OutputRank == 1 , typename SrcViewType::value_type *, + typename Impl::if_c< OutputRank == 2 , typename SrcViewType::value_type **, + typename Impl::if_c< OutputRank == 3 , typename SrcViewType::value_type ***, + typename Impl::if_c< OutputRank == 4 , typename SrcViewType::value_type ****, + typename Impl::if_c< OutputRank == 5 , typename SrcViewType::value_type *****, + typename Impl::if_c< OutputRank == 6 , typename SrcViewType::value_type ******, + typename Impl::if_c< OutputRank == 7 , typename SrcViewType::value_type *******, + typename SrcViewType::value_type ******** + >::type >::type >::type >::type >::type >::type >::type >::type OutputData ; + + // Choose space. + // If the source view's template arg1 or arg2 is a space then use it, + // otherwise use the source view's execution space. + + typedef typename Impl::if_c< Impl::is_space< SrcArg1Type >::value , SrcArg1Type , + typename Impl::if_c< Impl::is_space< SrcArg2Type >::value , SrcArg2Type , typename SrcViewType::execution_space + >::type >::type OutputSpace ; + +public: + + // If keeping the layout then match non-data type arguments + // else keep execution space and memory traits. + typedef typename + Impl::if_c< Impl::is_same< SrcViewLayout , OutputViewLayout >::value + , Kokkos::DualView< OutputData , SrcArg1Type , SrcArg2Type , SrcArg3Type > + , Kokkos::DualView< OutputData , OutputViewLayout , OutputSpace + , typename SrcViewType::memory_traits > + >::type type ; +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +namespace Kokkos { + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 > +typename Impl::ViewSubview< DualView + , ArgType0 , void , void , void + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , void , void , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0); + sub_view.h_view = subview(src.h_view,arg0); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , void , void + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , void , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1); + sub_view.h_view = subview(src.h_view,arg0,arg1); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , void + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , void + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , void , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , void , void , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , void , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , void , void ,void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , void , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , void , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 , class ArgType6 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , void + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 , + const ArgType6 & arg6 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , void + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +template< class D , class A1 , class A2 , class A3 , + class ArgType0 , class ArgType1 , class ArgType2 , class ArgType3 , + class ArgType4 , class ArgType5 , class ArgType6 , class ArgType7 > +typename Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , ArgType7 + >::type +subview( const DualView & src , + const ArgType0 & arg0 , + const ArgType1 & arg1 , + const ArgType2 & arg2 , + const ArgType3 & arg3 , + const ArgType4 & arg4 , + const ArgType5 & arg5 , + const ArgType6 & arg6 , + const ArgType7 & arg7 ) +{ + typedef typename + Impl::ViewSubview< DualView + , ArgType0 , ArgType1 , ArgType2 , ArgType3 + , ArgType4 , ArgType5 , ArgType6 , ArgType7 + >::type + DstViewType ; + DstViewType sub_view; + sub_view.d_view = subview(src.d_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); + sub_view.h_view = subview(src.h_view,arg0,arg1,arg2,arg3,arg4,arg5,arg6,arg7); + sub_view.modified_device = src.modified_device; + sub_view.modified_host = src.modified_host; + return sub_view; +} + +} // namespace Kokkos + +#endif /* KOKKOS_USING_EXP_VIEW */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { + +// +// Partial specialization of Kokkos::deep_copy() for DualView objects. +// + +template< class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > +void +deep_copy (DualView dst, // trust me, this must not be a reference + const DualView& src ) +{ + if (src.modified_device () >= src.modified_host ()) { + deep_copy (dst.d_view, src.d_view); + dst.template modify::device_type> (); + } else { + deep_copy (dst.h_view, src.h_view); + dst.template modify::host_mirror_space> (); + } +} + +template< class ExecutionSpace , + class DT , class DL , class DD , class DM , + class ST , class SL , class SD , class SM > +void +deep_copy (const ExecutionSpace& exec , + DualView dst, // trust me, this must not be a reference + const DualView& src ) +{ + if (src.modified_device () >= src.modified_host ()) { + deep_copy (exec, dst.d_view, src.d_view); + dst.template modify::device_type> (); + } else { + deep_copy (exec, dst.h_view, src.h_view); + dst.template modify::host_mirror_space> (); + } +} + +} // namespace Kokkos + +#endif diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp new file mode 100644 index 0000000000..f72277700a --- /dev/null +++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp @@ -0,0 +1,1834 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +/// \file Kokkos_DynRankView.hpp +/// \brief Declaration and definition of Kokkos::Experimental::DynRankView. +/// +/// This header file declares and defines Kokkos::Experimental::DynRankView and its +/// related nonmember functions. +/* + * Changes from View + * 1. The rank of the DynRankView is returned by the method rank() + * 2. Max rank of a DynRankView is 7 + * 3. subview name is subdynrankview + * 4. Every subdynrankview is returned with LayoutStride + * + * NEW: Redesigned DynRankView + * 5. subview function name now available + * 6. Copy and Copy-Assign View to DynRankView + * 7. deep_copy between Views and DynRankViews + * 8. rank( view ); returns the rank of View or DynRankView + */ + +#ifndef KOKKOS_DYNRANKVIEW_HPP +#define KOKKOS_DYNRANKVIEW_HPP + +#include +#include +#include + +namespace Kokkos { +namespace Experimental { + +template< typename DataType , class ... Properties > +class DynRankView; //forward declare + +namespace Impl { + +template +struct DynRankDimTraits { + + enum : size_t{unspecified = ~size_t(0)}; + + // Compute the rank of the view from the nonzero dimension arguments. + KOKKOS_INLINE_FUNCTION + static size_t computeRank( const size_t N0 + , const size_t N1 + , const size_t N2 + , const size_t N3 + , const size_t N4 + , const size_t N5 + , const size_t N6 + , const size_t N7 ) + { + return + ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified && N0 == unspecified) ? 0 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified && N1 == unspecified) ? 1 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified && N2 == unspecified) ? 2 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified && N3 == unspecified) ? 3 + : ( (N6 == unspecified && N5 == unspecified && N4 == unspecified) ? 4 + : ( (N6 == unspecified && N5 == unspecified) ? 5 + : ( (N6 == unspecified) ? 6 + : 7 ) ) ) ) ) ) ); + } + + // Compute the rank of the view from the nonzero layout arguments. + template + KOKKOS_INLINE_FUNCTION + static size_t computeRank( const Layout& layout ) + { + return computeRank( layout.dimension[0] + , layout.dimension[1] + , layout.dimension[2] + , layout.dimension[3] + , layout.dimension[4] + , layout.dimension[5] + , layout.dimension[6] + , layout.dimension[7] ); + } + + // Create the layout for the rank-7 view. + // Non-strided Layout + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value || std::is_same::value) , Layout >::type createLayout( const Layout& layout ) + { + return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 + , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 + , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 + , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 + , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 + , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 + , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 + , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 + ); + } + + // LayoutStride + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value) , Layout>::type createLayout( const Layout& layout ) + { + return Layout( layout.dimension[0] != unspecified ? layout.dimension[0] : 1 + , layout.stride[0] + , layout.dimension[1] != unspecified ? layout.dimension[1] : 1 + , layout.stride[1] + , layout.dimension[2] != unspecified ? layout.dimension[2] : 1 + , layout.stride[2] + , layout.dimension[3] != unspecified ? layout.dimension[3] : 1 + , layout.stride[3] + , layout.dimension[4] != unspecified ? layout.dimension[4] : 1 + , layout.stride[4] + , layout.dimension[5] != unspecified ? layout.dimension[5] : 1 + , layout.stride[5] + , layout.dimension[6] != unspecified ? layout.dimension[6] : 1 + , layout.stride[6] + , layout.dimension[7] != unspecified ? layout.dimension[7] : 1 + , layout.stride[7] + ); + } + + // Create a view from the given dimension arguments. + // This is only necessary because the shmem constructor doesn't take a layout. + template + static ViewType createView( const ViewArg& arg + , const size_t N0 + , const size_t N1 + , const size_t N2 + , const size_t N3 + , const size_t N4 + , const size_t N5 + , const size_t N6 + , const size_t N7 ) + { + return ViewType( arg + , N0 != unspecified ? N0 : 1 + , N1 != unspecified ? N1 : 1 + , N2 != unspecified ? N2 : 1 + , N3 != unspecified ? N3 : 1 + , N4 != unspecified ? N4 : 1 + , N5 != unspecified ? N5 : 1 + , N6 != unspecified ? N6 : 1 + , N7 != unspecified ? N7 : 1 ); + } +}; + + // Non-strided Layout + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value || std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) + { + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + , dynrank > 1 ? layout.dimension[1] : ~size_t(0) + , dynrank > 2 ? layout.dimension[2] : ~size_t(0) + , dynrank > 3 ? layout.dimension[3] : ~size_t(0) + , dynrank > 4 ? layout.dimension[4] : ~size_t(0) + , dynrank > 5 ? layout.dimension[5] : ~size_t(0) + , dynrank > 6 ? layout.dimension[6] : ~size_t(0) + , dynrank > 7 ? layout.dimension[7] : ~size_t(0) + ); + } + + // LayoutStride + template + KOKKOS_INLINE_FUNCTION + static typename std::enable_if< (std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank ) + { + return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0) + , dynrank > 0 ? layout.stride[0] : (0) + , dynrank > 1 ? layout.dimension[1] : ~size_t(0) + , dynrank > 1 ? layout.stride[1] : (0) + , dynrank > 2 ? layout.dimension[2] : ~size_t(0) + , dynrank > 2 ? layout.stride[2] : (0) + , dynrank > 3 ? layout.dimension[3] : ~size_t(0) + , dynrank > 3 ? layout.stride[3] : (0) + , dynrank > 4 ? layout.dimension[4] : ~size_t(0) + , dynrank > 4 ? layout.stride[4] : (0) + , dynrank > 5 ? layout.dimension[5] : ~size_t(0) + , dynrank > 5 ? layout.stride[5] : (0) + , dynrank > 6 ? layout.dimension[6] : ~size_t(0) + , dynrank > 6 ? layout.stride[6] : (0) + , dynrank > 7 ? layout.dimension[7] : ~size_t(0) + , dynrank > 7 ? layout.stride[7] : (0) + ); + } + + template < typename DynRankViewType , typename iType > + void verify_dynrankview_rank ( iType N , const DynRankViewType &drv ) + { + if ( static_cast(drv.rank()) > N ) + { + Kokkos::abort( "Need at least rank arguments to the operator()" ); + } + } + + +/** \brief Assign compatible default mappings */ +struct ViewToDynRankViewTag {}; + +template< class DstTraits , class SrcTraits > +class ViewMapping< DstTraits , SrcTraits , + typename std::enable_if<( + std::is_same< typename DstTraits::memory_space , typename SrcTraits::memory_space >::value + && + std::is_same< typename DstTraits::specialize , void >::value + && + std::is_same< typename SrcTraits::specialize , void >::value + && + ( + std::is_same< typename DstTraits::array_layout , typename SrcTraits::array_layout >::value + || + ( + ( + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename DstTraits::array_layout , Kokkos::LayoutStride >::value + ) + && + ( + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutLeft >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutRight >::value || + std::is_same< typename SrcTraits::array_layout , Kokkos::LayoutStride >::value + ) + ) + ) + ) , ViewToDynRankViewTag >::type > +{ +private: + + enum { is_assignable_value_type = + std::is_same< typename DstTraits::value_type + , typename SrcTraits::value_type >::value || + std::is_same< typename DstTraits::value_type + , typename SrcTraits::const_value_type >::value }; + + enum { is_assignable_layout = + std::is_same< typename DstTraits::array_layout + , typename SrcTraits::array_layout >::value || + std::is_same< typename DstTraits::array_layout + , Kokkos::LayoutStride >::value + }; + +public: + + enum { is_assignable = is_assignable_value_type && + is_assignable_layout }; + + typedef ViewMapping< DstTraits , void > DstType ; + typedef ViewMapping< SrcTraits , void > SrcType ; + + template < typename DT , typename ... DP , typename ST , typename ... SP > + KOKKOS_INLINE_FUNCTION + static void assign( Kokkos::Experimental::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src ) + { + static_assert( is_assignable_value_type + , "View assignment must have same value type or const = non-const" ); + + static_assert( is_assignable_layout + , "View assignment must have compatible layout or have rank <= 1" ); + + // Removed dimension checks... + + typedef typename DstType::offset_type dst_offset_type ; + dst.m_map.m_offset = dst_offset_type(std::integral_constant() , src.layout() ); //Check this for integer input1 for padding, etc + dst.m_map.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_handle , src.m_track ); + dst.m_track.assign( src.m_track , DstTraits::is_managed ); + dst.m_rank = src.Rank ; + } +}; + +} //end Impl + +/* \class DynRankView + * \brief Container that creates a Kokkos view with rank determined at runtime. + * Essentially this is a rank 7 view that wraps the access operators + * to yield the functionality of a view + * + * Changes from View + * 1. The rank of the DynRankView is returned by the method rank() + * 2. Max rank of a DynRankView is 7 + * 3. subview name is subdynrankview + * 4. Every subdynrankview is returned with LayoutStride + * + * NEW: Redesigned DynRankView + * 5. subview function name now available + * 6. Copy and Copy-Assign View to DynRankView + * 7. deep_copy between Views and DynRankViews + * 8. rank( view ); returns the rank of View or DynRankView + * + */ + +template< class > struct is_dyn_rank_view : public std::false_type {}; + +template< class D, class ... P > +struct is_dyn_rank_view< Kokkos::Experimental::DynRankView > : public std::true_type {}; + + +template< typename DataType , class ... Properties > +class DynRankView : public ViewTraits< DataType , Properties ... > +{ + static_assert( !std::is_array::value && !std::is_pointer::value , "Cannot template DynRankView with array or pointer datatype - must be pod" ); + +private: + template < class , class ... > friend class DynRankView ; +// template < class , class ... > friend class Kokkos::Experimental::View ; //unnecessary now... + template < class , class ... > friend class Impl::ViewMapping ; + +public: + typedef ViewTraits< DataType , Properties ... > drvtraits ; + + typedef View< DataType******* , Properties...> view_type ; + + typedef ViewTraits< DataType******* , Properties ... > traits ; + + +private: + typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ; + typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ; + + track_type m_track ; + map_type m_map ; + unsigned m_rank; + +public: + KOKKOS_INLINE_FUNCTION + view_type & DownCast() const { return ( view_type & ) (*this); } + KOKKOS_INLINE_FUNCTION + const view_type & ConstDownCast() const { return (const view_type & ) (*this); } + + //Types below - at least the HostMirror requires the value_type, NOT the rank 7 data_type of the traits + + /** \brief Compatible view of array of scalar types */ + typedef DynRankView< typename drvtraits::scalar_array_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > + array_type ; + + /** \brief Compatible view of const data type */ + typedef DynRankView< typename drvtraits::const_data_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > + const_type ; + + /** \brief Compatible view of non-const data type */ + typedef DynRankView< typename drvtraits::non_const_data_type , + typename drvtraits::array_layout , + typename drvtraits::device_type , + typename drvtraits::memory_traits > + non_const_type ; + + /** \brief Compatible HostMirror view */ + typedef DynRankView< typename drvtraits::non_const_data_type , + typename drvtraits::array_layout , + typename drvtraits::host_mirror_space > + HostMirror ; + + + //---------------------------------------- + // Domain rank and extents + +// enum { Rank = map_type::Rank }; //Will be dyn rank of 7 always, keep the enum? + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , size_t >::type + extent( const iType & r ) const + { return m_map.extent(r); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , int >::type + extent_int( const iType & r ) const + { return static_cast(m_map.extent(r)); } + + KOKKOS_INLINE_FUNCTION constexpr + typename traits::array_layout layout() const + { return m_map.layout(); } + + //---------------------------------------- + /* Deprecate all 'dimension' functions in favor of + * ISO/C++ vocabulary 'extent'. + */ + + template< typename iType > + KOKKOS_INLINE_FUNCTION constexpr + typename std::enable_if< std::is_integral::value , size_t >::type + dimension( const iType & r ) const { return extent( r ); } + + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_0() const { return m_map.dimension_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_1() const { return m_map.dimension_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_2() const { return m_map.dimension_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_3() const { return m_map.dimension_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_4() const { return m_map.dimension_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_5() const { return m_map.dimension_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_6() const { return m_map.dimension_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t dimension_7() const { return m_map.dimension_7(); } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION constexpr size_t size() const { return m_map.dimension_0() * + m_map.dimension_1() * + m_map.dimension_2() * + m_map.dimension_3() * + m_map.dimension_4() * + m_map.dimension_5() * + m_map.dimension_6() * + m_map.dimension_7(); } + + KOKKOS_INLINE_FUNCTION constexpr size_t stride_0() const { return m_map.stride_0(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_1() const { return m_map.stride_1(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_2() const { return m_map.stride_2(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_3() const { return m_map.stride_3(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_4() const { return m_map.stride_4(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_5() const { return m_map.stride_5(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_6() const { return m_map.stride_6(); } + KOKKOS_INLINE_FUNCTION constexpr size_t stride_7() const { return m_map.stride_7(); } + + template< typename iType > + KOKKOS_INLINE_FUNCTION void stride( iType * const s ) const { m_map.stride(s); } + + //---------------------------------------- + // Range span is the span which contains all members. + + typedef typename map_type::reference_type reference_type ; + typedef typename map_type::pointer_type pointer_type ; + + enum { reference_type_is_lvalue_reference = std::is_lvalue_reference< reference_type >::value }; + + KOKKOS_INLINE_FUNCTION constexpr size_t span() const { return m_map.span(); } + // Deprecated, use 'span()' instead + KOKKOS_INLINE_FUNCTION constexpr size_t capacity() const { return m_map.span(); } + KOKKOS_INLINE_FUNCTION constexpr bool span_is_contiguous() const { return m_map.span_is_contiguous(); } + KOKKOS_INLINE_FUNCTION constexpr pointer_type data() const { return m_map.data(); } + + // Deprecated, use 'span_is_contigous()' instead + KOKKOS_INLINE_FUNCTION constexpr bool is_contiguous() const { return m_map.span_is_contiguous(); } + // Deprecated, use 'data()' instead + KOKKOS_INLINE_FUNCTION constexpr pointer_type ptr_on_device() const { return m_map.data(); } + + //---------------------------------------- + // Allow specializations to query their specialized map + + KOKKOS_INLINE_FUNCTION + const Kokkos::Experimental::Impl::ViewMapping< traits , void > & + implementation_map() const { return m_map ; } + + //---------------------------------------- + +private: + + enum { + is_layout_left = std::is_same< typename traits::array_layout + , Kokkos::LayoutLeft >::value , + + is_layout_right = std::is_same< typename traits::array_layout + , Kokkos::LayoutRight >::value , + + is_layout_stride = std::is_same< typename traits::array_layout + , Kokkos::LayoutStride >::value , + + is_default_map = + std::is_same< typename traits::specialize , void >::value && + ( is_layout_left || is_layout_right || is_layout_stride ) + }; + +// Bounds checking macros +#if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK ) + +#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); \ + Kokkos::Experimental::Impl::verify_dynrankview_rank ( N , *this ) ; \ + Kokkos::Experimental::Impl::view_verify_operator_bounds ARG ; + +#else + +#define KOKKOS_VIEW_OPERATOR_VERIFY( N , ARG ) \ + Kokkos::Impl::VerifyExecutionCanAccessMemorySpace \ + < Kokkos::Impl::ActiveExecutionMemorySpace , typename traits::memory_space >::verify(); + +#endif + +public: + + KOKKOS_INLINE_FUNCTION + constexpr unsigned rank() const { return m_rank; } + + + //operators () + // Rank 0 + KOKKOS_INLINE_FUNCTION + reference_type operator()() const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 0 , ( implementation_map() ) ) + return implementation_map().reference(); + //return m_map.reference(0,0,0,0,0,0,0); + } + + // Rank 1 + // This assumes a contiguous underlying memory (i.e. no padding, no striding...) + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< std::is_same::value && std::is_integral::value, reference_type>::type + operator[](const iType & i0) const + { + return data()[i0]; + } + + // This assumes a contiguous underlying memory (i.e. no padding, no striding... + // AND a Trilinos/Sacado scalar type ) + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !std::is_same::value && std::is_integral::value, reference_type>::type + operator[](const iType & i0) const + { +// auto map = implementation_map(); + const size_t dim_scalar = m_map.dimension_scalar(); + const size_t bytes = this->span() / dim_scalar; + + typedef Kokkos::View > tmp_view_type; + tmp_view_type rankone_view(this->data(), bytes, dim_scalar); + return rankone_view(i0); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType & i0 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 1 , ( m_map , i0 ) ) + return m_map.reference(i0); + } + + template< typename iType > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType & i0 ) const + { + return m_map.reference(i0,0,0,0,0,0,0); + } + + // Rank 2 + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + return m_map.reference(i0,i1); + } + + template< typename iType0 , typename iType1 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 2 , ( m_map , i0 , i1 ) ) + return m_map.reference(i0,i1,0,0,0,0,0); + } + + // Rank 3 + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + return m_map.reference(i0,i1,i2); + } + + template< typename iType0 , typename iType1 , typename iType2 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 3 , ( m_map , i0 , i1 , i2 ) ) + return m_map.reference(i0,i1,i2,0,0,0,0); + } + + // Rank 4 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + return m_map.reference(i0,i1,i2,i3); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 4 , ( m_map , i0 , i1 , i2 , i3 ) ) + return m_map.reference(i0,i1,i2,i3,0,0,0); + } + + // Rank 5 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + return m_map.reference(i0,i1,i2,i3,i4); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 5 , ( m_map , i0 , i1 , i2 , i3 , i4 ) ) + return m_map.reference(i0,i1,i2,i3,i4,0,0); + } + + // Rank 6 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_same::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5); + } + + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< !(std::is_same::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 6 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,0); + } + + // Rank 7 + template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 > + KOKKOS_INLINE_FUNCTION + typename std::enable_if< (std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value && std::is_integral::value), reference_type>::type + operator()(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const + { + KOKKOS_VIEW_OPERATOR_VERIFY( 7 , ( m_map , i0 , i1 , i2 , i3 , i4 , i5 , i6 ) ) + return m_map.reference(i0,i1,i2,i3,i4,i5,i6); + } + +#undef KOKKOS_VIEW_OPERATOR_VERIFY + + //---------------------------------------- + // Standard constructor, destructor, and assignment operators... + + KOKKOS_INLINE_FUNCTION + ~DynRankView() {} + + KOKKOS_INLINE_FUNCTION + DynRankView() : m_track(), m_map(), m_rank() {} //Default ctor + + KOKKOS_INLINE_FUNCTION + DynRankView( const DynRankView & rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + + KOKKOS_INLINE_FUNCTION + DynRankView( DynRankView && rhs ) : m_track( rhs.m_track ), m_map( rhs.m_map ), m_rank(rhs.m_rank) {} + + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( const DynRankView & rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( DynRankView && rhs ) { m_track = rhs.m_track; m_map = rhs.m_map; m_rank = rhs.m_rank; return *this; } + + //---------------------------------------- + // Compatible view copy constructor and assignment + // may assign unmanaged from managed. + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView( const DynRankView & rhs ) + : m_track( rhs.m_track , traits::is_managed ) + , m_map() + , m_rank(rhs.m_rank) + { + typedef typename DynRankView ::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + } + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView & operator = (const DynRankView & rhs ) + { + typedef typename DynRankView ::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( m_map , rhs.m_map , rhs.m_track ); + m_track.assign( rhs.m_track , traits::is_managed ); + m_rank = rhs.rank(); + return *this; + } + +// Experimental +// Copy/Assign View to DynRankView + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView( const View & rhs ) + : m_track() + , m_map() + , m_rank( rhs.Rank ) + { + typedef typename View::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" ); + Mapping::assign( *this , rhs ); + } + + template< class RT , class ... RP > + KOKKOS_INLINE_FUNCTION + DynRankView & operator = ( const View & rhs ) + { + typedef typename View::traits SrcTraits ; + typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ; + static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" ); + Mapping::assign( *this , rhs ); + return *this ; + } + + //---------------------------------------- + // Allocation tracking properties + + KOKKOS_INLINE_FUNCTION + int use_count() const + { return m_track.use_count(); } + + inline + const std::string label() const + { return m_track.template get_label< typename traits::memory_space >(); } + + //---------------------------------------- + // Allocation according to allocation properties and array layout + // unused arg_layout dimensions must be set to ~size_t(0) so that rank deduction can properly take place + template< class ... P > + explicit inline + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : m_track() + , m_map() + , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) ) + { + // Append layout and spaces if not input + typedef Impl::ViewCtorProp< P ... > alloc_prop_input ; + + // use 'std::integral_constant' for non-types + // to avoid duplicate class error. + typedef Impl::ViewCtorProp + < P ... + , typename std::conditional + < alloc_prop_input::has_label + , std::integral_constant + , typename std::string + >::type + , typename std::conditional + < alloc_prop_input::has_memory_space + , std::integral_constant + , typename traits::device_type::memory_space + >::type + , typename std::conditional + < alloc_prop_input::has_execution_space + , std::integral_constant + , typename traits::device_type::execution_space + >::type + > alloc_prop ; + + static_assert( traits::is_managed + , "View allocation constructor requires managed memory" ); + + if ( alloc_prop::initialize && + ! alloc_prop::execution_space::is_initialized() ) { + // If initializing view data then + // the execution space must be initialized. + Kokkos::Impl::throw_runtime_exception("Constructing DynRankView and initializing data with uninitialized execution space"); + } + + // Copy the input allocation properties with possibly defaulted properties + alloc_prop prop( arg_prop ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + // If allocating in CudaUVMSpace must fence before and after + // the allocation to protect against possible concurrent access + // on the CPU and the GPU. + // Fence using the trait's executon space (which will be Kokkos::Cuda) + // to avoid incomplete type errors from usng Kokkos::Cuda directly. + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + Kokkos::Experimental::Impl::SharedAllocationRecord<> * + record = m_map.allocate_shared( prop , Impl::DynRankDimTraits::createLayout(arg_layout) ); + +//------------------------------------------------------------ +#if defined( KOKKOS_HAVE_CUDA ) + if ( std::is_same< Kokkos::CudaUVMSpace , typename traits::device_type::memory_space >::value ) { + traits::device_type::memory_space::execution_space::fence(); + } +#endif +//------------------------------------------------------------ + + // Setup and initialization complete, start tracking + m_track.assign_allocated_record_to_uninitialized( record ); + } + + + // Wrappers + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , typename traits::array_layout + >::type const & arg_layout + ) + : m_track() // No memory tracking + , m_map( arg_prop , Impl::DynRankDimTraits::createLayout(arg_layout) ) + , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) ) + { + static_assert( + std::is_same< pointer_type + , typename Impl::ViewCtorProp< P... >::pointer_type + >::value , + "Constructing DynRankView to wrap user memory must supply matching pointer type" ); + } + + //---------------------------------------- + //Constructor(s) + + // Simple dimension-only layout + template< class ... P > + explicit inline + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) + ) + : DynRankView( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + template< class ... P > + explicit KOKKOS_INLINE_FUNCTION + DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop + , typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer + , size_t + >::type const arg_N0 = ~size_t(0) + , const size_t arg_N1 = ~size_t(0) + , const size_t arg_N2 = ~size_t(0) + , const size_t arg_N3 = ~size_t(0) + , const size_t arg_N4 = ~size_t(0) + , const size_t arg_N5 = ~size_t(0) + , const size_t arg_N6 = ~size_t(0) + , const size_t arg_N7 = ~size_t(0) + ) + : DynRankView( arg_prop + , typename traits::array_layout + ( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 ) + ) + {} + + // Allocate with label and layout + template< typename Label > + explicit inline + DynRankView( const Label & arg_label + , typename std::enable_if< + Kokkos::Experimental::Impl::is_view_label