[libc] Allow customization of memcpy via flags.

- Adds LLVM_LIBC_IS_DEFINED macro to libc/src/__support/common.h
 - Adds a few knobs to memcpy to help with experimentations:
   - LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB replaces the implementation with a single call to rep;movsb
   - LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE customizes where the usage of rep;movsb

Differential Revision: https://reviews.llvm.org/D94692
This commit is contained in:
Guillaume Chatelet 2021-01-15 09:26:45 +00:00
parent bfb8f45ef3
commit a10300a2b2
2 changed files with 48 additions and 14 deletions

View File

@ -29,4 +29,27 @@
#define LLVM_LIBC_FUNCTION(type, name, arglist) type name arglist
#endif
namespace __llvm_libc {
namespace internal {
constexpr bool same_string(char const *lhs, char const *rhs) {
for (; *lhs || *rhs; ++lhs, ++rhs)
if (*lhs != *rhs)
return false;
return true;
}
} // namespace internal
} // namespace __llvm_libc
// LLVM_LIBC_IS_DEFINED checks whether a particular macro is defined.
// Usage: constexpr bool kUseAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
//
// This works by comparing the stringified version of the macro with and without
// evaluation. If FOO is not undefined both stringifications yield "FOO". If FOO
// is defined, one stringification yields "FOO" while the other yields its
// stringified value "1".
#define LLVM_LIBC_IS_DEFINED(macro) \
!__llvm_libc::internal::same_string( \
LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(macro), #macro)
#define LLVM_LIBC_IS_DEFINED__EVAL_AND_STRINGIZE(s) #s
#endif // LLVM_LIBC_SUPPORT_COMMON_H

View File

@ -12,6 +12,26 @@
namespace __llvm_libc {
// Whether to use only rep;movsb.
constexpr bool kUseOnlyRepMovsb =
LLVM_LIBC_IS_DEFINED(LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB);
// kRepMovsBSize == -1 : Only CopyAligned is used.
// kRepMovsBSize == 0 : Only RepMovsb is used.
// else CopyAligned is used up to kRepMovsBSize and then RepMovsb.
constexpr size_t kRepMovsBSize =
#ifdef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
#else
-1;
#endif // LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
// Whether target supports AVX instructions.
constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
// The chunk size used for the loop copy strategy.
constexpr size_t kLoopCopyBlockSize = kHasAvx ? 64 : 32;
static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
size_t count) {
// FIXME: Add MSVC support with
@ -21,12 +41,6 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory");
}
#if defined(__AVX__)
#define BEST_SIZE 64
#else
#define BEST_SIZE 32
#endif
// Design rationale
// ================
//
@ -47,6 +61,9 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
// with little change on the code side.
static void memcpy_x86(char *__restrict dst, const char *__restrict src,
size_t count) {
if (kUseOnlyRepMovsb)
return CopyRepMovsb(dst, src, count);
if (count == 0)
return;
if (count == 1)
@ -67,16 +84,10 @@ static void memcpy_x86(char *__restrict dst, const char *__restrict src,
return CopyBlockOverlap<32>(dst, src, count);
if (count < 128)
return CopyBlockOverlap<64>(dst, src, count);
#if defined(__AVX__)
if (count < 256)
if (kHasAvx && count < 256)
return CopyBlockOverlap<128>(dst, src, count);
#endif
// kRepMovsBSize == -1 : Only CopyAligned is used.
// kRepMovsBSize == 0 : Only RepMovsb is used.
// else CopyAligned is used to to kRepMovsBSize and then RepMovsb.
constexpr size_t kRepMovsBSize = -1;
if (count <= kRepMovsBSize)
return CopyAlignedBlocks<BEST_SIZE>(dst, src, count);
return CopyAlignedBlocks<kLoopCopyBlockSize>(dst, src, count);
return CopyRepMovsb(dst, src, count);
}