forked from OSchip/llvm-project
[pstl] Initial implementation of OpenMP backend, on behalf of Christopher Nelson nadiasvertex@gmail.com
Phabricator Review: https://reviews.llvm.org/D99836 A couple of parallel patterns still remains serial - "Parallel partial sort", and "Parallel transform scan" - there are //TODOs in the code.
This commit is contained in:
parent
e7418906f5
commit
6069a6a504
|
@ -31,7 +31,7 @@ function(define_linker_script target)
|
|||
set(link_libraries)
|
||||
if (interface_libs)
|
||||
foreach(lib IN LISTS interface_libs)
|
||||
if ("${lib}" STREQUAL "cxx-headers")
|
||||
if ("${lib}" MATCHES "cxx-headers|ParallelSTL")
|
||||
continue()
|
||||
endif()
|
||||
# If ${lib} is not a target, we use a dummy target which we know will
|
||||
|
|
|
@ -16,7 +16,7 @@ math(EXPR VERSION_PATCH "(${PARALLELSTL_VERSION_SOURCE} % 10)")
|
|||
|
||||
project(ParallelSTL VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_PATCH} LANGUAGES CXX)
|
||||
|
||||
set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial' and 'tbb'. The default is 'serial'.")
|
||||
set(PSTL_PARALLEL_BACKEND "serial" CACHE STRING "Threading backend to use. Valid choices are 'serial', 'omp', and 'tbb'. The default is 'serial'.")
|
||||
set(PSTL_HIDE_FROM_ABI_PER_TU OFF CACHE BOOL "Whether to constrain ABI-unstable symbols to each translation unit (basically, mark them with C's static keyword).")
|
||||
set(_PSTL_HIDE_FROM_ABI_PER_TU ${PSTL_HIDE_FROM_ABI_PER_TU}) # For __pstl_config_site
|
||||
|
||||
|
@ -43,6 +43,10 @@ elseif (PSTL_PARALLEL_BACKEND STREQUAL "tbb")
|
|||
message(STATUS "Parallel STL uses TBB ${TBB_VERSION} (interface version: ${TBB_INTERFACE_VERSION})")
|
||||
target_link_libraries(ParallelSTL INTERFACE TBB::tbb)
|
||||
set(_PSTL_PAR_BACKEND_TBB ON)
|
||||
elseif (PSTL_PARALLEL_BACKEND STREQUAL "omp")
|
||||
message(STATUS "Parallel STL uses the omp backend")
|
||||
target_compile_options(ParallelSTL INTERFACE "-fopenmp=libomp")
|
||||
set(_PSTL_PAR_BACKEND_OPENMP ON)
|
||||
else()
|
||||
message(FATAL_ERROR "Requested unknown Parallel STL backend '${PSTL_PARALLEL_BACKEND}'.")
|
||||
endif()
|
||||
|
|
|
@ -15,3 +15,7 @@ D: Created the initial implementation.
|
|||
N: Thomas Rodgers
|
||||
E: trodgers@redhat.com
|
||||
D: Identifier name transformation for inclusion in a Standard C++ library.
|
||||
|
||||
N: Christopher Nelson
|
||||
E: nadiasvertex@gmail.com
|
||||
D: Add support for an OpenMP backend.
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
|
||||
#cmakedefine _PSTL_PAR_BACKEND_SERIAL
|
||||
#cmakedefine _PSTL_PAR_BACKEND_TBB
|
||||
#cmakedefine _PSTL_PAR_BACKEND_OPENMP
|
||||
#cmakedefine _PSTL_HIDE_FROM_ABI_PER_TU
|
||||
|
||||
#endif // __PSTL_CONFIG_SITE
|
||||
|
|
|
@ -0,0 +1,64 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_H
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <class _Index, class _Fp>
|
||||
void
|
||||
__parallel_for_body(_Index __first, _Index __last, _Fp __f)
|
||||
{
|
||||
// initial partition of the iteration space into chunks
|
||||
auto __policy = __omp_backend::__chunk_partitioner(__first, __last);
|
||||
|
||||
// To avoid over-subscription we use taskloop for the nested parallelism
|
||||
_PSTL_PRAGMA(omp taskloop untied mergeable)
|
||||
for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
|
||||
{
|
||||
__omp_backend::__process_chunk(__policy, __first, __chunk, __f);
|
||||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Notation:
|
||||
// Evaluation of brick f[i,j) for each subrange [i,j) of [first, last)
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
template <class _ExecutionPolicy, class _Index, class _Fp>
|
||||
void
|
||||
__parallel_for(_ExecutionPolicy&&, _Index __first, _Index __last, _Fp __f)
|
||||
{
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
// we don't create a nested parallel region in an existing parallel
|
||||
// region: just create tasks
|
||||
__pstl::__omp_backend::__parallel_for_body(__first, __last, __f);
|
||||
}
|
||||
else
|
||||
{
|
||||
// in any case (nested or non-nested) one parallel region is created and
|
||||
// only one thread creates a set of tasks
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
_PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_body(__first, __last, __f); }
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_H
|
|
@ -0,0 +1,59 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <class _ForwardIterator, class _Fp>
|
||||
void
|
||||
__parallel_for_each_body(_ForwardIterator __first, _ForwardIterator __last, _Fp __f)
|
||||
{
|
||||
using DifferenceType = typename std::iterator_traits<_ForwardIterator>::difference_type;
|
||||
// TODO: Think of an approach to remove the std::distance call
|
||||
auto __size = std::distance(__first, __last);
|
||||
|
||||
_PSTL_PRAGMA(omp taskloop untied mergeable)
|
||||
for (DifferenceType __index = 0; __index < __size; ++__index)
|
||||
{
|
||||
// TODO: Think of an approach to remove the increment here each time.
|
||||
auto __iter = std::next(__first, __index);
|
||||
__f(*__iter);
|
||||
}
|
||||
}
|
||||
|
||||
template <class _ExecutionPolicy, class _ForwardIterator, class _Fp>
|
||||
void
|
||||
__parallel_for_each(_ExecutionPolicy&&, _ForwardIterator __first, _ForwardIterator __last, _Fp __f)
|
||||
{
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
// we don't create a nested parallel region in an existing parallel
|
||||
// region: just create tasks
|
||||
__pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f);
|
||||
}
|
||||
else
|
||||
{
|
||||
// in any case (nested or non-nested) one parallel region is created and
|
||||
// only one thread creates a set of tasks
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
_PSTL_PRAGMA(omp single nowait) { __pstl::__omp_backend::__parallel_for_each_body(__first, __last, __f); }
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_FOR_EACH_H
|
|
@ -0,0 +1,50 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <typename _F1, typename _F2>
|
||||
void
|
||||
__parallel_invoke_body(_F1&& __f1, _F2&& __f2)
|
||||
{
|
||||
_PSTL_PRAGMA(omp taskgroup)
|
||||
{
|
||||
_PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F1>(__f1)(); }
|
||||
_PSTL_PRAGMA(omp task untied mergeable) { std::forward<_F2>(__f2)(); }
|
||||
}
|
||||
}
|
||||
|
||||
template <class _ExecutionPolicy, typename _F1, typename _F2>
|
||||
void
|
||||
__parallel_invoke(_ExecutionPolicy&&, _F1&& __f1, _F2&& __f2)
|
||||
{
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
|
||||
}
|
||||
else
|
||||
{
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
_PSTL_PRAGMA(omp single nowait)
|
||||
__parallel_invoke_body(std::forward<_F1>(__f1), std::forward<_F2>(__f2));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_INVOKE_H
|
|
@ -0,0 +1,94 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <typename _RandomAccessIterator1, typename _RandomAccessIterator2, typename _RandomAccessIterator3,
|
||||
typename _Compare, typename _LeafMerge>
|
||||
void
|
||||
__parallel_merge_body(std::size_t __size_x, std::size_t __size_y, _RandomAccessIterator1 __xs,
|
||||
_RandomAccessIterator1 __xe, _RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye,
|
||||
_RandomAccessIterator3 __zs, _Compare __comp, _LeafMerge __leaf_merge)
|
||||
{
|
||||
|
||||
if (__size_x + __size_y <= __omp_backend::__default_chunk_size)
|
||||
{
|
||||
__leaf_merge(__xs, __xe, __ys, __ye, __zs, __comp);
|
||||
return;
|
||||
}
|
||||
|
||||
_RandomAccessIterator1 __xm;
|
||||
_RandomAccessIterator2 __ym;
|
||||
|
||||
if (__size_x < __size_y)
|
||||
{
|
||||
__ym = __ys + (__size_y / 2);
|
||||
__xm = std::upper_bound(__xs, __xe, *__ym, __comp);
|
||||
}
|
||||
else
|
||||
{
|
||||
__xm = __xs + (__size_x / 2);
|
||||
__ym = std::lower_bound(__ys, __ye, *__xm, __comp);
|
||||
}
|
||||
|
||||
auto __zm = __zs + (__xm - __xs) + (__ym - __ys);
|
||||
|
||||
_PSTL_PRAGMA(omp task untied mergeable default(none)
|
||||
firstprivate(__xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge))
|
||||
__parallel_merge_body(__xm - __xs, __ym - __ys, __xs, __xm, __ys, __ym, __zs, __comp, __leaf_merge);
|
||||
|
||||
_PSTL_PRAGMA(omp task untied mergeable default(none)
|
||||
firstprivate(__xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge))
|
||||
__parallel_merge_body(__xe - __xm, __ye - __ym, __xm, __xe, __ym, __ye, __zm, __comp, __leaf_merge);
|
||||
|
||||
_PSTL_PRAGMA(omp taskwait)
|
||||
}
|
||||
|
||||
template <class _ExecutionPolicy, typename _RandomAccessIterator1, typename _RandomAccessIterator2,
|
||||
typename _RandomAccessIterator3, typename _Compare, typename _LeafMerge>
|
||||
void
|
||||
__parallel_merge(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator1 __xs, _RandomAccessIterator1 __xe,
|
||||
_RandomAccessIterator2 __ys, _RandomAccessIterator2 __ye, _RandomAccessIterator3 __zs, _Compare __comp,
|
||||
_LeafMerge __leaf_merge)
|
||||
|
||||
{
|
||||
std::size_t __size_x = __xe - __xs;
|
||||
std::size_t __size_y = __ye - __ys;
|
||||
|
||||
/*
|
||||
* Run the merge in parallel by chunking it up. Use the smaller range (if any) as the iteration range, and the
|
||||
* larger range as the search range.
|
||||
*/
|
||||
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge);
|
||||
}
|
||||
else
|
||||
{
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
{
|
||||
_PSTL_PRAGMA(omp single nowait)
|
||||
__parallel_merge_body(__size_x, __size_y, __xs, __xe, __ys, __ye, __zs, __comp, __leaf_merge);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_MERGE_H
|
|
@ -0,0 +1,68 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction>
|
||||
_Value
|
||||
__parallel_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity,
|
||||
_RealBody __real_body, _Reduction __reduce)
|
||||
{
|
||||
auto __middle = __first + ((__last - __first) / 2);
|
||||
_Value __v1(__identity), __v2(__identity);
|
||||
__parallel_invoke_body(
|
||||
[&]() { __v1 = __parallel_reduce_body(__first, __middle, __identity, __real_body, __reduce); },
|
||||
[&]() { __v2 = __parallel_reduce_body(__middle, __last, __identity, __real_body, __reduce); });
|
||||
|
||||
return __reduce(__v1, __v2);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// Notation:
|
||||
// r(i,j,init) returns reduction of init with reduction over [i,j)
|
||||
// c(x,y) combines values x and y that were the result of r
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
template <class _ExecutionPolicy, class _RandomAccessIterator, class _Value, typename _RealBody, typename _Reduction>
|
||||
_Value
|
||||
__parallel_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last, _Value __identity,
|
||||
_RealBody __real_body, _Reduction __reduction)
|
||||
{
|
||||
// We don't create a nested parallel region in an existing parallel region:
|
||||
// just create tasks.
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
return __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction);
|
||||
}
|
||||
|
||||
// In any case (nested or non-nested) one parallel region is created and only
|
||||
// one thread creates a set of tasks.
|
||||
_Value __res = __identity;
|
||||
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
_PSTL_PRAGMA(omp single nowait)
|
||||
{
|
||||
__res = __pstl::__omp_backend::__parallel_reduce_body(__first, __last, __identity, __real_body, __reduction);
|
||||
}
|
||||
|
||||
return __res;
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_REDUCE_H
|
|
@ -0,0 +1,136 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H
|
||||
|
||||
#include "parallel_invoke.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <typename _Index>
|
||||
_Index
|
||||
__split(_Index __m)
|
||||
{
|
||||
_Index __k = 1;
|
||||
while (2 * __k < __m)
|
||||
__k *= 2;
|
||||
return __k;
|
||||
}
|
||||
|
||||
template <typename _Index, typename _Tp, typename _Rp, typename _Cp>
|
||||
void
|
||||
__upsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Rp __reduce, _Cp __combine)
|
||||
{
|
||||
if (__m == 1)
|
||||
__r[0] = __reduce(__i * __tilesize, __lastsize);
|
||||
else
|
||||
{
|
||||
_Index __k = __split(__m);
|
||||
__omp_backend::__parallel_invoke_body(
|
||||
[=] { __omp_backend::__upsweep(__i, __k, __tilesize, __r, __tilesize, __reduce, __combine); },
|
||||
[=] {
|
||||
__omp_backend::__upsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize, __reduce, __combine);
|
||||
});
|
||||
if (__m == 2 * __k)
|
||||
__r[__m - 1] = __combine(__r[__k - 1], __r[__m - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename _Index, typename _Tp, typename _Cp, typename _Sp>
|
||||
void
|
||||
__downsweep(_Index __i, _Index __m, _Index __tilesize, _Tp* __r, _Index __lastsize, _Tp __initial, _Cp __combine,
|
||||
_Sp __scan)
|
||||
{
|
||||
if (__m == 1)
|
||||
__scan(__i * __tilesize, __lastsize, __initial);
|
||||
else
|
||||
{
|
||||
const _Index __k = __split(__m);
|
||||
__omp_backend::__parallel_invoke_body(
|
||||
[=] { __omp_backend::__downsweep(__i, __k, __tilesize, __r, __tilesize, __initial, __combine, __scan); },
|
||||
// Assumes that __combine never throws.
|
||||
// TODO: Consider adding a requirement for user functors to be constant.
|
||||
[=, &__combine]
|
||||
{
|
||||
__omp_backend::__downsweep(__i + __k, __m - __k, __tilesize, __r + __k, __lastsize,
|
||||
__combine(__initial, __r[__k - 1]), __combine, __scan);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
template <typename _ExecutionPolicy, typename _Index, typename _Tp, typename _Rp, typename _Cp, typename _Sp,
|
||||
typename _Ap>
|
||||
void
|
||||
__parallel_strict_scan_body(_Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan, _Ap __apex)
|
||||
{
|
||||
_Index __p = omp_get_num_threads();
|
||||
const _Index __slack = 4;
|
||||
_Index __tilesize = (__n - 1) / (__slack * __p) + 1;
|
||||
_Index __m = (__n - 1) / __tilesize;
|
||||
__buffer<_Tp> __buf(__m + 1);
|
||||
_Tp* __r = __buf.get();
|
||||
|
||||
__omp_backend::__upsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __reduce, __combine);
|
||||
|
||||
std::size_t __k = __m + 1;
|
||||
_Tp __t = __r[__k - 1];
|
||||
while ((__k &= __k - 1))
|
||||
{
|
||||
__t = __combine(__r[__k - 1], __t);
|
||||
}
|
||||
|
||||
__apex(__combine(__initial, __t));
|
||||
__omp_backend::__downsweep(_Index(0), _Index(__m + 1), __tilesize, __r, __n - __m * __tilesize, __initial,
|
||||
__combine, __scan);
|
||||
}
|
||||
|
||||
template <class _ExecutionPolicy, typename _Index, typename _Tp, typename _Rp, typename _Cp, typename _Sp, typename _Ap>
|
||||
void
|
||||
__parallel_strict_scan(_ExecutionPolicy&&, _Index __n, _Tp __initial, _Rp __reduce, _Cp __combine, _Sp __scan,
|
||||
_Ap __apex)
|
||||
{
|
||||
if (__n <= __default_chunk_size)
|
||||
{
|
||||
_Tp __sum = __initial;
|
||||
if (__n)
|
||||
{
|
||||
__sum = __combine(__sum, __reduce(_Index(0), __n));
|
||||
}
|
||||
__apex(__sum);
|
||||
if (__n)
|
||||
{
|
||||
__scan(_Index(0), __n, __initial);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
__pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine,
|
||||
__scan, __apex);
|
||||
}
|
||||
else
|
||||
{
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
_PSTL_PRAGMA(omp single nowait)
|
||||
{
|
||||
__pstl::__omp_backend::__parallel_strict_scan_body<_ExecutionPolicy>(__n, __initial, __reduce, __combine,
|
||||
__scan, __apex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_SCAN_H
|
|
@ -0,0 +1,32 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
|
||||
void
|
||||
__parallel_stable_partial_sort(_RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp,
|
||||
_LeafSort __leaf_sort, std::size_t /* __nsort */)
|
||||
{
|
||||
// TODO: "Parallel partial sort needs to be implemented.");
|
||||
__leaf_sort(__xs, __xe, __comp);
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_STABLE_PARTIAL_SORT_H
|
|
@ -0,0 +1,157 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
namespace __sort_details
|
||||
{
|
||||
struct __move_value
|
||||
{
|
||||
template <typename _Iterator, typename _OutputIterator>
|
||||
void
|
||||
operator()(_Iterator __x, _OutputIterator __z) const
|
||||
{
|
||||
*__z = std::move(*__x);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename _RandomAccessIterator, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
__parallel_move_range(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _OutputIterator __d_first)
|
||||
{
|
||||
std::size_t __size = __last1 - __first1;
|
||||
|
||||
// Perform serial moving of small chunks
|
||||
|
||||
if (__size <= __default_chunk_size)
|
||||
{
|
||||
return std::move(__first1, __last1, __d_first);
|
||||
}
|
||||
|
||||
// Perform parallel moving of larger chunks
|
||||
auto __policy = __omp_backend::__chunk_partitioner(__first1, __last1);
|
||||
|
||||
_PSTL_PRAGMA(omp taskloop)
|
||||
for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
|
||||
{
|
||||
__omp_backend::__process_chunk(__policy, __first1, __chunk,
|
||||
[&](auto __chunk_first, auto __chunk_last)
|
||||
{
|
||||
auto __chunk_offset = __chunk_first - __first1;
|
||||
auto __output_it = __d_first + __chunk_offset;
|
||||
std::move(__chunk_first, __chunk_last, __output_it);
|
||||
});
|
||||
}
|
||||
|
||||
return __d_first + __size;
|
||||
}
|
||||
|
||||
struct __move_range
|
||||
{
|
||||
template <typename _RandomAccessIterator, typename _OutputIterator>
|
||||
_OutputIterator
|
||||
operator()(_RandomAccessIterator __first1, _RandomAccessIterator __last1, _OutputIterator __d_first) const
|
||||
{
|
||||
return __parallel_move_range(__first1, __last1, __d_first);
|
||||
}
|
||||
};
|
||||
} // namespace __sort_details
|
||||
|
||||
template <typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
|
||||
void
|
||||
__parallel_stable_sort_body(_RandomAccessIterator __xs, _RandomAccessIterator __xe, _Compare __comp,
|
||||
_LeafSort __leaf_sort)
|
||||
{
|
||||
using _ValueType = typename std::iterator_traits<_RandomAccessIterator>::value_type;
|
||||
using _VecType = typename std::vector<_ValueType>;
|
||||
using _OutputIterator = typename _VecType::iterator;
|
||||
using _MoveValue = typename __omp_backend::__sort_details::__move_value;
|
||||
using _MoveRange = __omp_backend::__sort_details::__move_range;
|
||||
|
||||
if (__should_run_serial(__xs, __xe))
|
||||
{
|
||||
__leaf_sort(__xs, __xe, __comp);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::size_t __size = __xe - __xs;
|
||||
auto __mid = __xs + (__size / 2);
|
||||
__parallel_invoke_body([&]() { __parallel_stable_sort_body(__xs, __mid, __comp, __leaf_sort); },
|
||||
[&]() { __parallel_stable_sort_body(__mid, __xe, __comp, __leaf_sort); });
|
||||
|
||||
// Perform a parallel merge of the sorted ranges into __output_data.
|
||||
_VecType __output_data(__size);
|
||||
_MoveValue __move_value;
|
||||
_MoveRange __move_range;
|
||||
__utils::__serial_move_merge __merge(__size);
|
||||
__parallel_merge_body(
|
||||
__mid - __xs, __xe - __mid, __xs, __mid, __mid, __xe, __output_data.begin(), __comp,
|
||||
[&__merge, &__move_value, &__move_range](_RandomAccessIterator __as, _RandomAccessIterator __ae,
|
||||
_RandomAccessIterator __bs, _RandomAccessIterator __be,
|
||||
_OutputIterator __cs, _Compare __comp)
|
||||
{ __merge(__as, __ae, __bs, __be, __cs, __comp, __move_value, __move_value, __move_range, __move_range); });
|
||||
|
||||
// Move the values from __output_data back in the original source range.
|
||||
__omp_backend::__sort_details::__parallel_move_range(__output_data.begin(), __output_data.end(), __xs);
|
||||
}
|
||||
}
|
||||
|
||||
template <class _ExecutionPolicy, typename _RandomAccessIterator, typename _Compare, typename _LeafSort>
|
||||
void
|
||||
__parallel_stable_sort(_ExecutionPolicy&& /*__exec*/, _RandomAccessIterator __xs, _RandomAccessIterator __xe,
|
||||
_Compare __comp, _LeafSort __leaf_sort, std::size_t __nsort = 0)
|
||||
{
|
||||
auto __count = static_cast<std::size_t>(__xe - __xs);
|
||||
if (__count <= __default_chunk_size || __nsort < __count)
|
||||
{
|
||||
__leaf_sort(__xs, __xe, __comp);
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO: the partial sort implementation should
|
||||
// be shared with the other backends.
|
||||
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
if (__count <= __nsort)
|
||||
{
|
||||
__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
|
||||
}
|
||||
else
|
||||
{
|
||||
__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
_PSTL_PRAGMA(omp single nowait)
|
||||
if (__count <= __nsort)
|
||||
{
|
||||
__parallel_stable_sort_body(__xs, __xe, __comp, __leaf_sort);
|
||||
}
|
||||
else
|
||||
{
|
||||
__parallel_stable_partial_sort(__xs, __xe, __comp, __leaf_sort, __nsort);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_STABLE_SORT_H
|
|
@ -0,0 +1,112 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_transform_reduce
|
||||
//
|
||||
// Notation:
|
||||
// r(i,j,init) returns reduction of init with reduction over [i,j)
|
||||
// u(i) returns f(i,i+1,identity) for a hypothetical left identity element
|
||||
// of r c(x,y) combines values x and y that were the result of r or u
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
template <class _RandomAccessIterator, class _UnaryOp, class _Value, class _Combiner, class _Reduction>
|
||||
auto
|
||||
__transform_reduce_body(_RandomAccessIterator __first, _RandomAccessIterator __last, _UnaryOp __unary_op, _Value __init,
|
||||
_Combiner __combiner, _Reduction __reduction)
|
||||
{
|
||||
const std::size_t __num_threads = omp_get_num_threads();
|
||||
const std::size_t __size = __last - __first;
|
||||
|
||||
// Initial partition of the iteration space into chunks. If the range is too small,
|
||||
// this will result in a nonsense policy, so we check on the size as well below.
|
||||
auto __policy = __omp_backend::__chunk_partitioner(__first + __num_threads, __last);
|
||||
|
||||
if (__size <= __num_threads || __policy.__n_chunks < 2)
|
||||
{
|
||||
return __reduction(__first, __last, __init);
|
||||
}
|
||||
|
||||
// Here, we cannot use OpenMP UDR because we must store the init value in
|
||||
// the combiner and it will be used several times. Although there should be
|
||||
// the only one; we manually generate the identity elements for each thread.
|
||||
std::vector<_Value> __accums;
|
||||
__accums.reserve(__num_threads);
|
||||
|
||||
// initialize accumulators for all threads
|
||||
for (std::size_t __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
__accums.emplace_back(__unary_op(__first + __i));
|
||||
}
|
||||
|
||||
// main loop
|
||||
_PSTL_PRAGMA(omp taskloop shared(__accums))
|
||||
for (std::size_t __chunk = 0; __chunk < __policy.__n_chunks; ++__chunk)
|
||||
{
|
||||
__omp_backend::__process_chunk(__policy, __first + __num_threads, __chunk,
|
||||
[&](auto __chunk_first, auto __chunk_last)
|
||||
{
|
||||
auto __thread_num = omp_get_thread_num();
|
||||
__accums[__thread_num] =
|
||||
__reduction(__chunk_first, __chunk_last, __accums[__thread_num]);
|
||||
});
|
||||
}
|
||||
|
||||
// combine by accumulators
|
||||
for (std::size_t __i = 0; __i < __num_threads; ++__i)
|
||||
{
|
||||
__init = __combiner(__init, __accums[__i]);
|
||||
}
|
||||
|
||||
return __init;
|
||||
}
|
||||
|
||||
template <class _ExecutionPolicy, class _RandomAccessIterator, class _UnaryOp, class _Value, class _Combiner,
|
||||
class _Reduction>
|
||||
_Value
|
||||
__parallel_transform_reduce(_ExecutionPolicy&&, _RandomAccessIterator __first, _RandomAccessIterator __last,
|
||||
_UnaryOp __unary_op, _Value __init, _Combiner __combiner, _Reduction __reduction)
|
||||
{
|
||||
_Value __result = __init;
|
||||
if (omp_in_parallel())
|
||||
{
|
||||
// We don't create a nested parallel region in an existing parallel
|
||||
// region: just create tasks
|
||||
__result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner,
|
||||
__reduction);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Create a parallel region, and a single thread will create tasks
|
||||
// for the region.
|
||||
_PSTL_PRAGMA(omp parallel)
|
||||
_PSTL_PRAGMA(omp single nowait)
|
||||
{
|
||||
__result = __pstl::__omp_backend::__transform_reduce_body(__first, __last, __unary_op, __init, __combiner,
|
||||
__reduction);
|
||||
}
|
||||
}
|
||||
|
||||
return __result;
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_REDUCE_H
|
|
@ -0,0 +1,32 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H
|
||||
#define _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H
|
||||
|
||||
#include "util.h"
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
template <class _ExecutionPolicy, class _Index, class _Up, class _Tp, class _Cp, class _Rp, class _Sp>
|
||||
_Tp
|
||||
__parallel_transform_scan(_ExecutionPolicy&&, _Index __n, _Up /* __u */, _Tp __init, _Cp /* __combine */,
|
||||
_Rp /* __brick_reduce */, _Sp __scan)
|
||||
{
|
||||
// TODO: parallelize this function.
|
||||
return __scan(_Index(0), __n, __init);
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
#endif // _PSTL_INTERNAL_OMP_PARALLEL_TRANSFORM_SCAN_H
|
|
@ -0,0 +1,173 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_INTERNAL_OMP_UTIL_H
|
||||
#define _PSTL_INTERNAL_OMP_UTIL_H
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <iterator>
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <omp.h>
|
||||
|
||||
#include "../parallel_backend_utils.h"
|
||||
#include "../unseq_backend_simd.h"
|
||||
#include "../utils.h"
|
||||
|
||||
// Portability "#pragma" definition
|
||||
#ifdef _MSC_VER
|
||||
# define _PSTL_PRAGMA(x) __pragma(x)
|
||||
#else
|
||||
# define _PSTL_PRAGMA(x) _Pragma(# x)
|
||||
#endif
|
||||
|
||||
_PSTL_HIDE_FROM_ABI_PUSH
|
||||
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __omp_backend
|
||||
{
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// use to cancel execution
|
||||
//------------------------------------------------------------------------
|
||||
inline void
|
||||
__cancel_execution()
|
||||
{
|
||||
// TODO: Figure out how to make cancelation work.
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// raw buffer
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
template <typename _Tp>
|
||||
class __buffer
|
||||
{
|
||||
std::allocator<_Tp> __allocator_;
|
||||
_Tp* __ptr_;
|
||||
const std::size_t __buf_size_;
|
||||
__buffer(const __buffer&) = delete;
|
||||
void
|
||||
operator=(const __buffer&) = delete;
|
||||
|
||||
public:
|
||||
__buffer(std::size_t __n) : __allocator_(), __ptr_(__allocator_.allocate(__n)), __buf_size_(__n) {}
|
||||
|
||||
operator bool() const { return __ptr_ != nullptr; }
|
||||
|
||||
_Tp*
|
||||
get() const
|
||||
{
|
||||
return __ptr_;
|
||||
}
|
||||
~__buffer() { __allocator_.deallocate(__ptr_, __buf_size_); }
|
||||
};
|
||||
|
||||
// Preliminary size of each chunk: requires further discussion
|
||||
inline constexpr std::size_t __default_chunk_size = 2048;
|
||||
|
||||
// Convenience function to determine when we should run serial.
|
||||
template <typename _Iterator, std::enable_if_t<!std::is_integral<_Iterator>::value, bool> = true>
|
||||
constexpr auto
|
||||
__should_run_serial(_Iterator __first, _Iterator __last) -> bool
|
||||
{
|
||||
using _difference_type = typename std::iterator_traits<_Iterator>::difference_type;
|
||||
auto __size = std::distance(__first, __last);
|
||||
return __size <= static_cast<_difference_type>(__default_chunk_size);
|
||||
}
|
||||
|
||||
template <typename _Index, std::enable_if_t<std::is_integral<_Index>::value, bool> = true>
|
||||
constexpr auto
|
||||
__should_run_serial(_Index __first, _Index __last) -> bool
|
||||
{
|
||||
using _difference_type = _Index;
|
||||
auto __size = __last - __first;
|
||||
return __size <= static_cast<_difference_type>(__default_chunk_size);
|
||||
}
|
||||
|
||||
struct __chunk_metrics
|
||||
{
|
||||
std::size_t __n_chunks;
|
||||
std::size_t __chunk_size;
|
||||
std::size_t __first_chunk_size;
|
||||
};
|
||||
|
||||
// The iteration space partitioner according to __requested_chunk_size
|
||||
template <class _RandomAccessIterator, class _Size = std::size_t>
|
||||
auto
|
||||
__chunk_partitioner(_RandomAccessIterator __first, _RandomAccessIterator __last,
|
||||
_Size __requested_chunk_size = __default_chunk_size) -> __chunk_metrics
|
||||
{
|
||||
/*
|
||||
* This algorithm improves distribution of elements in chunks by avoiding
|
||||
* small tail chunks. The leftover elements that do not fit neatly into
|
||||
* the chunk size are redistributed to early chunks. This improves
|
||||
* utilization of the processor's prefetch and reduces the number of
|
||||
* tasks needed by 1.
|
||||
*/
|
||||
|
||||
const _Size __n = __last - __first;
|
||||
_Size __n_chunks = 0;
|
||||
_Size __chunk_size = 0;
|
||||
_Size __first_chunk_size = 0;
|
||||
if (__n < __requested_chunk_size)
|
||||
{
|
||||
__chunk_size = __n;
|
||||
__first_chunk_size = __n;
|
||||
__n_chunks = 1;
|
||||
return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
|
||||
}
|
||||
|
||||
__n_chunks = (__n / __requested_chunk_size) + 1;
|
||||
__chunk_size = __n / __n_chunks;
|
||||
__first_chunk_size = __chunk_size;
|
||||
const _Size __n_leftover_items = __n - (__n_chunks * __chunk_size);
|
||||
|
||||
if (__n_leftover_items == __chunk_size)
|
||||
{
|
||||
__n_chunks += 1;
|
||||
return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
|
||||
}
|
||||
else if (__n_leftover_items == 0)
|
||||
{
|
||||
__first_chunk_size = __chunk_size;
|
||||
return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
|
||||
}
|
||||
|
||||
const _Size __n_extra_items_per_chunk = __n_leftover_items / __n_chunks;
|
||||
const _Size __n_final_leftover_items = __n_leftover_items - (__n_extra_items_per_chunk * __n_chunks);
|
||||
|
||||
__chunk_size += __n_extra_items_per_chunk;
|
||||
__first_chunk_size = __chunk_size + __n_final_leftover_items;
|
||||
|
||||
return __chunk_metrics{__n_chunks, __chunk_size, __first_chunk_size};
|
||||
}
|
||||
|
||||
template <typename _Iterator, typename _Index, typename _Func>
|
||||
void
|
||||
__process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chunk_index, _Func __f)
|
||||
{
|
||||
auto __this_chunk_size = __chunk_index == 0 ? __metrics.__first_chunk_size : __metrics.__chunk_size;
|
||||
auto __index = __chunk_index == 0 ? 0
|
||||
: (__chunk_index * __metrics.__chunk_size) +
|
||||
(__metrics.__first_chunk_size - __metrics.__chunk_size);
|
||||
auto __first = __base + __index;
|
||||
auto __last = __first + __this_chunk_size;
|
||||
__f(__first, __last);
|
||||
}
|
||||
|
||||
} // namespace __omp_backend
|
||||
} // namespace __pstl
|
||||
|
||||
#endif // _PSTL_INTERNAL_OMP_UTIL_H
|
|
@ -24,6 +24,12 @@ namespace __pstl
|
|||
{
|
||||
namespace __par_backend = __tbb_backend;
|
||||
}
|
||||
#elif defined(_PSTL_PAR_BACKEND_OPENMP)
|
||||
# include "parallel_backend_omp.h"
|
||||
namespace __pstl
|
||||
{
|
||||
namespace __par_backend = __omp_backend;
|
||||
}
|
||||
#else
|
||||
_PSTL_PRAGMA_MESSAGE("Parallel backend was not specified");
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
// -*- C++ -*-
|
||||
// -*-===----------------------------------------------------------------------===//
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef _PSTL_PARALLEL_BACKEND_OMP_H
|
||||
#define _PSTL_PARALLEL_BACKEND_OMP_H
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_invoke
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include "./omp/parallel_invoke.h"
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_for
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include "./omp/parallel_for.h"
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_for_each
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include "./omp/parallel_for_each.h"
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_reduce
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include "./omp/parallel_reduce.h"
|
||||
#include "./omp/parallel_transform_reduce.h"
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_scan
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include "./omp/parallel_scan.h"
|
||||
#include "./omp/parallel_transform_scan.h"
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_stable_sort
|
||||
//------------------------------------------------------------------------
|
||||
|
||||
#include "./omp/parallel_stable_partial_sort.h"
|
||||
#include "./omp/parallel_stable_sort.h"
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
// parallel_merge
|
||||
//------------------------------------------------------------------------
|
||||
#include "./omp/parallel_merge.h"
|
||||
|
||||
#endif //_PSTL_PARALLEL_BACKEND_OMP_H
|
|
@ -18,7 +18,7 @@
|
|||
#define _PSTL_VERSION_MINOR ((_PSTL_VERSION % 1000) / 10)
|
||||
#define _PSTL_VERSION_PATCH (_PSTL_VERSION % 10)
|
||||
|
||||
#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB)
|
||||
#if !defined(_PSTL_PAR_BACKEND_SERIAL) && !defined(_PSTL_PAR_BACKEND_TBB) && !defined(_PSTL_PAR_BACKEND_OPENMP)
|
||||
# error "A parallel backend must be specified"
|
||||
#endif
|
||||
|
||||
|
@ -96,7 +96,8 @@
|
|||
#endif
|
||||
|
||||
// Should be defined to 1 for environments with a vendor implementation of C++17 execution policies
|
||||
#define _PSTL_CPP17_EXECUTION_POLICIES_PRESENT (_MSC_VER >= 1912)
|
||||
#define _PSTL_CPP17_EXECUTION_POLICIES_PRESENT (_MSC_VER >= 1912 && _MSVC_LANG >= 201703L) || \
|
||||
(_GLIBCXX_RELEASE >= 9 && __GLIBCXX__ >= 20190503 && __cplusplus >= 201703L)
|
||||
|
||||
#if (defined(_MSC_VER) && _MSC_VER >= 1900) || \
|
||||
__cplusplus >= 201300L || \
|
||||
|
|
|
@ -63,14 +63,14 @@ void __invoke_if_not(std::true_type, _Fp)
|
|||
}
|
||||
|
||||
template <typename _F1, typename _F2>
|
||||
typename std::result_of<_F1()>::type
|
||||
typename std::invoke_result<_F1()>::type
|
||||
__invoke_if_else(std::true_type, _F1 __f1, _F2)
|
||||
{
|
||||
return __f1();
|
||||
}
|
||||
|
||||
template <typename _F1, typename _F2>
|
||||
typename std::result_of<_F2()>::type
|
||||
typename std::invoke_result<_F2()>::type
|
||||
__invoke_if_else(std::false_type, _F1, _F2 __f2)
|
||||
{
|
||||
return __f2();
|
||||
|
|
Loading…
Reference in New Issue