forked from OSchip/llvm-project
[libc] Allow customization of memcpy via flags.
- Adds LLVM_LIBC_IS_DEFINED macro to libc/src/__support/common.h - Adds a few knobs to memcpy to help with experimentations: - LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB replaces the implementation with a single call to rep;movsb - LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE customizes where the usage of rep;movsb Differential Revision: https://reviews.llvm.org/D94692
This commit is contained in:
parent
bfb8f45ef3
commit
a10300a2b2
|
@ -29,4 +29,27 @@
|
|||
#define LLVM_LIBC_FUNCTION(type, name, arglist) type name arglist
|
||||
#endif
|
||||
|
||||
namespace __llvm_libc {
|
||||
namespace internal {
|
||||
constexpr bool same_string(char const *lhs, char const *rhs) {
|
||||
for (; *lhs || *rhs; ++lhs, ++rhs)
|
||||
if (*lhs != *rhs)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
} // namespace internal
|
||||
} // namespace __llvm_libc
|
||||
|
||||
// LLVM_LIBC_IS_DEFINED checks whether a particular macro is defined.
|
||||
// Usage: constexpr bool kUseAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
|
||||
//
|
||||
// This works by comparing the stringified version of the macro with and without
|
||||
// evaluation. If FOO is not undefined both stringifications yield "FOO". If FOO
|
||||
// is defined, one stringification yields "FOO" while the other yields its
|
||||
// stringified value "1".
|
||||
#define LLVM_LIBC_IS_DEFINED(macro) \
|
||||
!__llvm_libc::internal::same_string( \
|
||||
LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(macro), #macro)
|
||||
#define LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(s) #s
|
||||
|
||||
#endif // LLVM_LIBC_SUPPORT_COMMON_H
|
||||
|
|
|
@ -12,6 +12,26 @@
|
|||
|
||||
namespace __llvm_libc {
|
||||
|
||||
// Whether to use only rep;movsb.
|
||||
constexpr bool kUseOnlyRepMovsb =
|
||||
LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
|
||||
|
||||
// kRepMovsBSize == -1 : Only CopyAligned is used.
|
||||
// kRepMovsBSize == 0 : Only RepMovsb is used.
|
||||
// else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
|
||||
constexpr size_t kRepMovsBSize =
|
||||
#ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
|
||||
LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
|
||||
#else
|
||||
-1;
|
||||
#endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
|
||||
|
||||
// Whether target supports AVX instructions.
|
||||
constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
|
||||
|
||||
// The chunk size used for the loop copy strategy.
|
||||
constexpr size_t kLoopCopyBlockSize = kHasAvx ? 64 : 32;
|
||||
|
||||
static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
// FIXME: Add MSVC support with
|
||||
|
@ -21,12 +41,6 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
|
|||
asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
|
||||
}
|
||||
|
||||
#if defined(__AVX__)
|
||||
#define BEST_SIZE 64
|
||||
#else
|
||||
#define BEST_SIZE 32
|
||||
#endif
|
||||
|
||||
// Design rationale
|
||||
// ================
|
||||
//
|
||||
|
@ -47,6 +61,9 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
|
|||
// with little change on the code side.
|
||||
static void memcpy_x86(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
if (kUseOnlyRepMovsb)
|
||||
return CopyRepMovsb(dst, src, count);
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
if (count == 1)
|
||||
|
@ -67,16 +84,10 @@ static void memcpy_x86(char *__restrict dst, const char *__restrict src,
|
|||
return CopyBlockOverlap<32>(dst, src, count);
|
||||
if (count < 128)
|
||||
return CopyBlockOverlap<64>(dst, src, count);
|
||||
#if defined(__AVX__)
|
||||
if (count < 256)
|
||||
if (kHasAvx && count < 256)
|
||||
return CopyBlockOverlap<128>(dst, src, count);
|
||||
#endif
|
||||
// kRepMovsBSize == -1 : Only CopyAligned is used.
|
||||
// kRepMovsBSize == 0 : Only RepMovsb is used.
|
||||
// else CopyAligned is used to to kRepMovsBSize and then RepMovsb.
|
||||
constexpr size_t kRepMovsBSize = -1;
|
||||
if (count <= kRepMovsBSize)
|
||||
return CopyAlignedBlocks<BEST_SIZE>(dst, src, count);
|
||||
return CopyAlignedBlocks<kLoopCopyBlockSize>(dst, src, count);
|
||||
return CopyRepMovsb(dst, src, count);
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue