[libc] Add optimized memset for AArch64

Differential Revision: https://reviews.llvm.org/D107848
This commit is contained in:
Andre Vieira 2021-09-23 09:19:47 +01:00
parent 904ca7d2ed
commit 8b87c3d573
3 changed files with 105 additions and 1 deletions

View File

@ -341,7 +341,7 @@ endif()
function(add_memset memset_name)
add_implementation(memset ${memset_name}
SRCS ${LIBC_SOURCE_DIR}/src/string/memset.cpp
SRCS ${MEMSET_SRC}
HDRS ${LIBC_SOURCE_DIR}/src/string/memset.h
DEPENDS
.memory_utils.memory_utils
@ -353,13 +353,20 @@ function(add_memset memset_name)
endfunction()
if(${LIBC_TARGET_ARCHITECTURE_IS_X86})
set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
add_memset(memset_x86_64_opt_sse2 COMPILE_OPTIONS -march=k8 REQUIRE SSE2)
add_memset(memset_x86_64_opt_sse4 COMPILE_OPTIONS -march=nehalem REQUIRE SSE4_2)
add_memset(memset_x86_64_opt_avx2 COMPILE_OPTIONS -march=haswell REQUIRE AVX2)
add_memset(memset_x86_64_opt_avx512 COMPILE_OPTIONS -march=skylake-avx512 REQUIRE AVX512F)
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_memset(memset)
elseif(${LIBC_TARGET_ARCHITECTURE_IS_AARCH64})
set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/aarch64/memset.cpp)
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE}
COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
add_memset(memset COMPILE_OPTIONS "SHELL:-mllvm --tail-merge-threshold=0")
else()
set(MEMSET_SRC ${LIBC_SOURCE_DIR}/src/string/memset.cpp)
add_memset(memset_opt_host COMPILE_OPTIONS ${LIBC_COMPILE_OPTIONS_NATIVE})
add_memset(memset)
endif()

View File

@ -0,0 +1,49 @@
//===-- Implementation of memset ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "src/string/memset.h"
#include "src/__support/common.h"
#include "src/string/memory_utils/memset_utils.h"
namespace __llvm_libc {
using namespace __llvm_libc::aarch64_memset;
inline static void AArch64Memset(char *dst, int value, size_t count) {
if (count == 0)
return;
if (count <= 3) {
SplatSet<_1>(dst, value);
if (count > 1)
SplatSet<Tail<_2>>(dst, value, count);
return;
}
if (count <= 8)
return SplatSet<HeadTail<_4>>(dst, value, count);
if (count <= 16)
return SplatSet<HeadTail<_8>>(dst, value, count);
if (count <= 32)
return SplatSet<HeadTail<_16>>(dst, value, count);
if (count <= 96) {
SplatSet<_32>(dst, value);
if (count <= 64)
return SplatSet<Tail<_32>>(dst, value, count);
SplatSet<Skip<32>::Then<_32>>(dst, value);
SplatSet<Tail<_32>>(dst, value, count);
return;
}
if (count < 448 || value != 0 || !AArch64ZVA(dst, count))
return SplatSet<Align<_16, Arg::_1>::Then<Loop<_64>>>(dst, value, count);
}
LLVM_LIBC_FUNCTION(void *, memset, (void *dst, int value, size_t count)) {
AArch64Memset((char *)dst, value, count);
return dst;
}
} // namespace __llvm_libc

View File

@ -18,6 +18,54 @@
#endif
namespace __llvm_libc {
namespace aarch64_memset {
#ifdef __ARM_NEON
struct Splat8 {
static constexpr size_t kSize = 8;
static void SplatSet(char *dst, const unsigned char value) {
vst1_u8((uint8_t *)dst, vdup_n_u8(value));
}
};
struct Splat16 {
static constexpr size_t kSize = 16;
static void SplatSet(char *dst, const unsigned char value) {
vst1q_u8((uint8_t *)dst, vdupq_n_u8(value));
}
};
using _8 = Splat8;
using _16 = Splat16;
#else
using _8 = __llvm_libc::scalar::_8;
using _16 = Repeated<_8, 2>;
#endif // __ARM_NEON
using _1 = __llvm_libc::scalar::_1;
using _2 = __llvm_libc::scalar::_2;
using _3 = __llvm_libc::scalar::_3;
using _4 = __llvm_libc::scalar::_4;
using _32 = Chained<_16, _16>;
using _64 = Chained<_32, _32>;
struct ZVA {
static constexpr size_t kSize = 64;
static void SplatSet(char *dst, const unsigned char value) {
asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
}
};
inline static bool AArch64ZVA(char *dst, size_t count) {
uint64_t zva_val;
asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
if ((zva_val & 31) != 4)
return false;
SplatSet<Align<_64, Arg::_1>::Then<Loop<ZVA, _64>>>(dst, 0, count);
return true;
}
} // namespace aarch64_memset
namespace aarch64 {
using _1 = __llvm_libc::scalar::_1;