forked from OSchip/llvm-project
[libc] Add optimized memset for AArch64
Differential Revision: https://reviews.llvm.org/D107848
This commit is contained in:
parent
904ca7d2ed
commit
8b87c3d573
|
@ -341,7 +341,7 @@ endif()
|
|||
|
||||
function(add_memset memset_name)
|
||||
add_implementation(memset ${memset_name}
|
||||
SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp
|
||||
SRCS ${MEMSET_SRC}
|
||||
HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h
|
||||
DEPENDS
|
||||
.memory_utils.memory_utils
|
||||
|
@ -353,13 +353,20 @@ function(add_memset memset_name)
|
|||
endfunction()
|
||||
|
||||
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
|
||||
set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
|
||||
add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
|
||||
add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
|
||||
add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
|
||||
add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
|
||||
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_memset(memset)
|
||||
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
|
||||
set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memset.cpp)
|
||||
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
|
||||
COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
|
||||
add_memset(memset COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
|
||||
else()
|
||||
set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
|
||||
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
|
||||
add_memset(memset)
|
||||
endif()
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
//===-- Implementation of memset ------------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/string/memset.h"
|
||||
#include "src/__support/common.h"
|
||||
#include "src/string/memory_utils/memset_utils.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
using namespace __llvm_libc::aarch64_memset;
|
||||
|
||||
inline static void AArch64Memset(char *dst, int value, size_t count) {
|
||||
if (count == 0)
|
||||
return;
|
||||
if (count <= 3) {
|
||||
SplatSet<_1>(dst, value);
|
||||
if (count > 1)
|
||||
SplatSet<Tail<_2>>(dst, value, count);
|
||||
return;
|
||||
}
|
||||
if (count <= 8)
|
||||
return SplatSet<HeadTail<_4>>(dst, value, count);
|
||||
if (count <= 16)
|
||||
return SplatSet<HeadTail<_8>>(dst, value, count);
|
||||
if (count <= 32)
|
||||
return SplatSet<HeadTail<_16>>(dst, value, count);
|
||||
if (count <= 96) {
|
||||
SplatSet<_32>(dst, value);
|
||||
if (count <= 64)
|
||||
return SplatSet<Tail<_32>>(dst, value, count);
|
||||
SplatSet<Skip<32>::Then<_32>>(dst, value);
|
||||
SplatSet<Tail<_32>>(dst, value, count);
|
||||
return;
|
||||
}
|
||||
if (count < 448 || value != 0 || !AArch64ZVA(dst, count))
|
||||
return SplatSet<Align<_16, Arg::_1>::Then<Loop<_64>>>(dst, value, count);
|
||||
}
|
||||
|
||||
LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) {
|
||||
AArch64Memset((char *)dst, value, count);
|
||||
return dst;
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
|
@ -18,6 +18,54 @@
|
|||
#endif
|
||||
|
||||
namespace __llvm_libc {
|
||||
namespace aarch64_memset {
|
||||
#ifdef __ARM_NEON
|
||||
struct Splat8 {
|
||||
static constexpr size_t kSize = 8;
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
vst1_u8((uint8_t *)dst, vdup_n_u8(value));
|
||||
}
|
||||
};
|
||||
|
||||
struct Splat16 {
|
||||
static constexpr size_t kSize = 16;
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
vst1q_u8((uint8_t *)dst, vdupq_n_u8(value));
|
||||
}
|
||||
};
|
||||
|
||||
using _8 = Splat8;
|
||||
using _16 = Splat16;
|
||||
#else
|
||||
using _8 = __llvm_libc::scalar::_8;
|
||||
using _16 = Repeated<_8, 2>;
|
||||
#endif // __ARM_NEON
|
||||
|
||||
using _1 = __llvm_libc::scalar::_1;
|
||||
using _2 = __llvm_libc::scalar::_2;
|
||||
using _3 = __llvm_libc::scalar::_3;
|
||||
using _4 = __llvm_libc::scalar::_4;
|
||||
using _32 = Chained<_16, _16>;
|
||||
using _64 = Chained<_32, _32>;
|
||||
|
||||
struct ZVA {
|
||||
static constexpr size_t kSize = 64;
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
|
||||
}
|
||||
};
|
||||
|
||||
inline static bool AArch64ZVA(char *dst, size_t count) {
|
||||
uint64_t zva_val;
|
||||
asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
|
||||
if ((zva_val & 31) != 4)
|
||||
return false;
|
||||
SplatSet<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count);
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace aarch64_memset
|
||||
|
||||
namespace aarch64 {
|
||||
|
||||
using _1 = __llvm_libc::scalar::_1;
|
||||
|
|
Loading…
Reference in New Issue