forked from OSchip/llvm-project
Revert "[libc] Add a set of elementary operations"
This reverts commit 4694321fbe
.
This commit is contained in:
parent
4694321fbe
commit
c3242238b7
|
@ -194,7 +194,7 @@ function(add_implementation name impl_name)
|
|||
SRCS ${ADD_IMPL_SRCS}
|
||||
HDRS ${ADD_IMPL_HDRS}
|
||||
DEPENDS ${ADD_IMPL_DEPENDS}
|
||||
COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} "SHELL:-mllvm -combiner-global-alias-analysis"
|
||||
COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS}
|
||||
)
|
||||
get_fq_target_name(${impl_name} fq_target_name)
|
||||
set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}")
|
||||
|
|
|
@ -8,19 +8,10 @@
|
|||
|
||||
#include "src/string/memcpy.h"
|
||||
#include "src/__support/common.h"
|
||||
#include "src/string/memory_utils/elements.h"
|
||||
#include "src/string/memory_utils/memcpy_utils.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
using _1 = scalar::UINT8;
|
||||
using _2 = scalar::UINT16;
|
||||
using _3 = Chained<scalar::UINT16, scalar::UINT8>;
|
||||
using _4 = scalar::UINT32;
|
||||
using _8 = scalar::UINT64;
|
||||
using _16 = Repeated<scalar::UINT64, 2>;
|
||||
using _32 = Repeated<scalar::UINT64, 4>;
|
||||
using _64 = Repeated<scalar::UINT64, 8>;
|
||||
|
||||
// Design rationale
|
||||
// ================
|
||||
//
|
||||
|
@ -46,24 +37,24 @@ static void memcpy_aarch64(char *__restrict dst, const char *__restrict src,
|
|||
if (count == 0)
|
||||
return;
|
||||
if (count == 1)
|
||||
return Copy<_1>(dst, src);
|
||||
return CopyBlock<1>(dst, src);
|
||||
if (count == 2)
|
||||
return Copy<_2>(dst, src);
|
||||
return CopyBlock<2>(dst, src);
|
||||
if (count == 3)
|
||||
return Copy<_3>(dst, src);
|
||||
return CopyBlock<3>(dst, src);
|
||||
if (count == 4)
|
||||
return Copy<_4>(dst, src);
|
||||
return CopyBlock<4>(dst, src);
|
||||
if (count < 8)
|
||||
return Copy<HeadTail<_4>>(dst, src, count);
|
||||
return CopyBlockOverlap<4>(dst, src, count);
|
||||
if (count < 16)
|
||||
return Copy<HeadTail<_8>>(dst, src, count);
|
||||
return CopyBlockOverlap<8>(dst, src, count);
|
||||
if (count < 32)
|
||||
return Copy<HeadTail<_16>>(dst, src, count);
|
||||
return CopyBlockOverlap<16>(dst, src, count);
|
||||
if (count < 64)
|
||||
return Copy<HeadTail<_32>>(dst, src, count);
|
||||
return CopyBlockOverlap<32>(dst, src, count);
|
||||
if (count < 128)
|
||||
return Copy<HeadTail<_64>>(dst, src, count);
|
||||
return Copy<Align<_16, Arg::Src>::Then<Loop<_64>>>(dst, src, count);
|
||||
return CopyBlockOverlap<64>(dst, src, count);
|
||||
return CopySrcAlignedBlocks<64, 16>(dst, src, count);
|
||||
}
|
||||
|
||||
LLVM_LIBC_FUNCTION(void *, memcpy,
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
#include "src/string/memcpy.h"
|
||||
#include "src/__support/common.h"
|
||||
#include "src/string/memory_utils/elements.h"
|
||||
#include "src/string/memory_utils/memcpy_utils.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
|
@ -32,30 +32,27 @@ namespace __llvm_libc {
|
|||
// with little change on the code side.
|
||||
static void memcpy_impl(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
// Use scalar strategies (_1, _2, _3 ...)
|
||||
using namespace __llvm_libc::scalar;
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
if (count == 1)
|
||||
return Copy<_1>(dst, src);
|
||||
return CopyBlock<1>(dst, src);
|
||||
if (count == 2)
|
||||
return Copy<_2>(dst, src);
|
||||
return CopyBlock<2>(dst, src);
|
||||
if (count == 3)
|
||||
return Copy<_3>(dst, src);
|
||||
return CopyBlock<3>(dst, src);
|
||||
if (count == 4)
|
||||
return Copy<_4>(dst, src);
|
||||
return CopyBlock<4>(dst, src);
|
||||
if (count < 8)
|
||||
return Copy<HeadTail<_4>>(dst, src, count);
|
||||
return CopyBlockOverlap<4>(dst, src, count);
|
||||
if (count < 16)
|
||||
return Copy<HeadTail<_8>>(dst, src, count);
|
||||
return CopyBlockOverlap<8>(dst, src, count);
|
||||
if (count < 32)
|
||||
return Copy<HeadTail<_16>>(dst, src, count);
|
||||
return CopyBlockOverlap<16>(dst, src, count);
|
||||
if (count < 64)
|
||||
return Copy<HeadTail<_32>>(dst, src, count);
|
||||
return CopyBlockOverlap<32>(dst, src, count);
|
||||
if (count < 128)
|
||||
return Copy<HeadTail<_64>>(dst, src, count);
|
||||
return Copy<Align<_32, Arg::Src>::Then<Loop<_32>>>(dst, src, count);
|
||||
return CopyBlockOverlap<64>(dst, src, count);
|
||||
return CopySrcAlignedBlocks<32>(dst, src, count);
|
||||
}
|
||||
|
||||
LLVM_LIBC_FUNCTION(void *, memcpy,
|
||||
|
|
|
@ -2,5 +2,6 @@ add_header_library(
|
|||
memory_utils
|
||||
HDRS
|
||||
utils.h
|
||||
elements.h
|
||||
memcpy_utils.h
|
||||
memset_utils.h
|
||||
)
|
||||
|
|
|
@ -1,499 +0,0 @@
|
|||
//===-- Elementary operations to compose memory primitives ----------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
|
||||
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
#include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
|
||||
|
||||
#include "src/__support/endian.h"
|
||||
#include "src/string/memory_utils/utils.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
// Elementary Operations
|
||||
// --------------------------------
|
||||
// We define abstract elementary operations acting on fixed chunks of memory.
|
||||
// These are low level building blocks that are meant to be assembled to compose
|
||||
// higher order abstractions. Each function is defined twice: once with
|
||||
// fixed-size operations, and once with runtime-size operations.
|
||||
|
||||
// Fixed-size copies from 'src' to 'dst'.
|
||||
template <typename Element>
|
||||
void Copy(char *__restrict dst, const char *__restrict src) {
|
||||
Element::Copy(dst, src);
|
||||
}
|
||||
// Runtime-size copies from 'src' to 'dst'.
|
||||
template <typename Element>
|
||||
void Copy(char *__restrict dst, const char *__restrict src, size_t size) {
|
||||
Element::Copy(dst, src, size);
|
||||
}
|
||||
|
||||
// Fixed-size equality between 'lhs' and 'rhs'.
|
||||
template <typename Element> bool Equals(const char *lhs, const char *rhs) {
|
||||
return Element::Equals(lhs, rhs);
|
||||
}
|
||||
// Runtime-size equality between 'lhs' and 'rhs'.
|
||||
template <typename Element>
|
||||
bool Equals(const char *lhs, const char *rhs, size_t size) {
|
||||
return Element::Equals(lhs, rhs, size);
|
||||
}
|
||||
|
||||
// Fixed-size three-way comparison between 'lhs' and 'rhs'.
|
||||
template <typename Element>
|
||||
int ThreeWayCompare(const char *lhs, const char *rhs) {
|
||||
return Element::ThreeWayCompare(lhs, rhs);
|
||||
}
|
||||
// Runtime-size three-way comparison between 'lhs' and 'rhs'.
|
||||
template <typename Element>
|
||||
int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
|
||||
return Element::ThreeWayCompare(lhs, rhs, size);
|
||||
}
|
||||
|
||||
// Fixed-size initialization.
|
||||
template <typename Element>
|
||||
void SplatSet(char *dst, const unsigned char value) {
|
||||
Element::SplatSet(dst, value);
|
||||
}
|
||||
// Runtime-size initialization.
|
||||
template <typename Element>
|
||||
void SplatSet(char *dst, const unsigned char value, size_t size) {
|
||||
Element::SplatSet(dst, value, size);
|
||||
}
|
||||
|
||||
// Fixed-size Higher-Order Operations
|
||||
// ----------------------------------
|
||||
// - Repeated<Type, ElementCount>: Repeat the operation several times in a row.
|
||||
// - Chained<Types...>: Chain the operation of several types.
|
||||
|
||||
// Repeat the operation several times in a row.
|
||||
template <typename Element, size_t ElementCount> struct Repeated {
|
||||
static constexpr size_t kSize = ElementCount * Element::kSize;
|
||||
|
||||
static void Copy(char *__restrict dst, const char *__restrict src) {
|
||||
for (size_t i = 0; i < ElementCount; ++i) {
|
||||
const size_t offset = i * Element::kSize;
|
||||
Element::Copy(dst + offset, src + offset);
|
||||
}
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs) {
|
||||
for (size_t i = 0; i < ElementCount; ++i) {
|
||||
const size_t offset = i * Element::kSize;
|
||||
if (!Element::Equals(lhs + offset, rhs + offset))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs) {
|
||||
for (size_t i = 0; i < ElementCount; ++i) {
|
||||
const size_t offset = i * Element::kSize;
|
||||
// We make the assumption that 'Equals' si cheaper than 'ThreeWayCompare'.
|
||||
if (Element::Equals(lhs + offset, rhs + offset))
|
||||
continue;
|
||||
return Element::ThreeWayCompare(lhs + offset, rhs + offset);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
for (size_t i = 0; i < ElementCount; ++i) {
|
||||
const size_t offset = i * Element::kSize;
|
||||
Element::SplatSet(dst + offset, value);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Chain the operation of several types.
|
||||
// For instance, to handle a 3 bytes operation, one can use:
|
||||
// Chained<UINT16, UINT8>::Operation();
|
||||
template <typename... Types> struct Chained;
|
||||
|
||||
template <typename Head, typename... Tail> struct Chained<Head, Tail...> {
|
||||
static constexpr size_t kSize = Head::kSize + Chained<Tail...>::kSize;
|
||||
|
||||
static void Copy(char *__restrict dst, const char *__restrict src) {
|
||||
Chained<Tail...>::Copy(dst + Head::kSize, src + Head::kSize);
|
||||
__llvm_libc::Copy<Head>(dst, src);
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs) {
|
||||
if (!__llvm_libc::Equals<Head>(lhs, rhs))
|
||||
return false;
|
||||
return Chained<Tail...>::Equals(lhs + Head::kSize, rhs + Head::kSize);
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs) {
|
||||
if (__llvm_libc::Equals<Head>(lhs, rhs))
|
||||
return Chained<Tail...>::ThreeWayCompare(lhs + Head::kSize,
|
||||
rhs + Head::kSize);
|
||||
return __llvm_libc::ThreeWayCompare<Head>(lhs, rhs);
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
Chained<Tail...>::SplatSet(dst + Head::kSize, value);
|
||||
__llvm_libc::SplatSet<Head>(dst, value);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct Chained<> {
|
||||
static constexpr size_t kSize = 0;
|
||||
static void Copy(char *__restrict dst, const char *__restrict src) {}
|
||||
static bool Equals(const char *lhs, const char *rhs) { return true; }
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs) { return 0; }
|
||||
static void SplatSet(char *dst, const unsigned char value) {}
|
||||
};
|
||||
|
||||
// Runtime-size Higher-Order Operations
|
||||
// ------------------------------------
|
||||
// - Tail<T>: Perform the operation on the last 'T::kSize' bytes of the buffer.
|
||||
// - HeadTail<T>: Perform the operation on the first and last 'T::kSize' bytes
|
||||
// of the buffer.
|
||||
// - Loop<T>: Perform a loop of fixed-sized operations.
|
||||
|
||||
// Perform the operation on the last 'T::kSize' bytes of the buffer.
|
||||
//
|
||||
// e.g. with
|
||||
// [1234567812345678123]
|
||||
// [__XXXXXXXXXXXXXX___]
|
||||
// [________XXXXXXXX___]
|
||||
//
|
||||
// Precondition: `size >= T::kSize`.
|
||||
template <typename T> struct Tail {
|
||||
static void Copy(char *__restrict dst, const char *__restrict src,
|
||||
size_t size) {
|
||||
return T::Copy(dst + offset(size), src + offset(size));
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs, size_t size) {
|
||||
return T::Equals(lhs + offset(size), rhs + offset(size));
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
|
||||
return T::ThreeWayCompare(lhs + offset(size), rhs + offset(size));
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value, size_t size) {
|
||||
return T::SplatSet(dst + offset(size), value);
|
||||
}
|
||||
|
||||
static size_t offset(size_t size) { return size - T::kSize; }
|
||||
};
|
||||
|
||||
// Perform the operation on the first and last 'T::kSize' bytes of the buffer.
|
||||
// This is useful for overlapping operations.
|
||||
//
|
||||
// e.g. with
|
||||
// [1234567812345678123]
|
||||
// [__XXXXXXXXXXXXXX___]
|
||||
// [__XXXXXXXX_________]
|
||||
// [________XXXXXXXX___]
|
||||
//
|
||||
// Precondition: `size >= T::kSize && size <= 2 x T::kSize`.
|
||||
template <typename T> struct HeadTail {
|
||||
static void Copy(char *__restrict dst, const char *__restrict src,
|
||||
size_t size) {
|
||||
T::Copy(dst, src);
|
||||
Tail<T>::Copy(dst, src, size);
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs, size_t size) {
|
||||
if (!T::Equals(lhs, rhs))
|
||||
return false;
|
||||
return Tail<T>::Equals(lhs, rhs, size);
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
|
||||
if (const int result = T::ThreeWayCompare(lhs, rhs))
|
||||
return result;
|
||||
return Tail<T>::ThreeWayCompare(lhs, rhs, size);
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value, size_t size) {
|
||||
T::SplatSet(dst, value);
|
||||
Tail<T>::SplatSet(dst, value, size);
|
||||
}
|
||||
};
|
||||
|
||||
// Simple loop ending with a Tail operation.
|
||||
//
|
||||
// e.g. with
|
||||
// [12345678123456781234567812345678]
|
||||
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
|
||||
// [__XXXXXXXX_______________________]
|
||||
// [__________XXXXXXXX_______________]
|
||||
// [__________________XXXXXXXX_______]
|
||||
// [______________________XXXXXXXX___]
|
||||
//
|
||||
// Precondition:
|
||||
// - size >= T::kSize
|
||||
template <typename T> struct Loop {
|
||||
static void Copy(char *__restrict dst, const char *__restrict src,
|
||||
size_t size) {
|
||||
for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
|
||||
T::Copy(dst + offset, src + offset);
|
||||
Tail<T>::Copy(dst, src, size);
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs, size_t size) {
|
||||
for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
|
||||
if (!T::Equals(lhs + offset, rhs + offset))
|
||||
return false;
|
||||
return Tail<T>::Equals(lhs, rhs, size);
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
|
||||
for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
|
||||
if (const int result = T::ThreeWayCompare(lhs + offset, rhs + offset))
|
||||
return result;
|
||||
return Tail<T>::ThreeWayCompare(lhs, rhs, size);
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value, size_t size) {
|
||||
for (size_t offset = 0; offset < size - T::kSize; offset += T::kSize)
|
||||
T::SplatSet(dst + offset, value);
|
||||
Tail<T>::SplatSet(dst, value, size);
|
||||
}
|
||||
};
|
||||
|
||||
enum class Arg { _1, _2, Dst = _1, Src = _2, Lhs = _1, Rhs = _2 };
|
||||
|
||||
namespace internal {
|
||||
|
||||
// Provides a specialized Bump function that adjusts pointers and size so first
|
||||
// argument (resp. second argument) gets aligned to Alignment.
|
||||
// We make sure the compiler knows about the adjusted pointer alignment.
|
||||
template <Arg arg, size_t Alignment> struct AlignHelper {};
|
||||
|
||||
template <size_t Alignment> struct AlignHelper<Arg::_1, Alignment> {
|
||||
template <typename T1, typename T2>
|
||||
static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
|
||||
const intptr_t offset = offset_to_next_aligned<Alignment>(p1ref);
|
||||
p1ref += offset;
|
||||
p2ref += offset;
|
||||
size -= offset;
|
||||
p1ref = assume_aligned<Alignment>(p1ref);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t Alignment> struct AlignHelper<Arg::_2, Alignment> {
|
||||
template <typename T1, typename T2>
|
||||
static void Bump(T1 *__restrict &p1ref, T2 *__restrict &p2ref, size_t &size) {
|
||||
const intptr_t offset = offset_to_next_aligned<Alignment>(p2ref);
|
||||
p1ref += offset;
|
||||
p2ref += offset;
|
||||
size -= offset;
|
||||
p2ref = assume_aligned<Alignment>(p2ref);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace internal
|
||||
|
||||
// An alignment operation that:
|
||||
// - executes the 'AlignmentT' operation
|
||||
// - bumps 'dst' or 'src' (resp. 'lhs' or 'rhs') pointers so that the selected
|
||||
// pointer gets aligned, size is decreased accordingly.
|
||||
// - calls the 'NextT' operation.
|
||||
//
|
||||
// e.g. A 16-byte Destination Aligned 32-byte Loop Copy can be written as:
|
||||
// Copy<Align<_16, Arg::Dst>::Then<Loop<_32>>>(dst, src, count);
|
||||
template <typename AlignmentT, Arg AlignOn> struct Align {
|
||||
private:
|
||||
static constexpr size_t Alignment = AlignmentT::kSize;
|
||||
static_assert(Alignment > 1, "Alignment must be more than 1");
|
||||
static_assert(is_power2(Alignment), "Alignment must be a power of 2");
|
||||
|
||||
public:
|
||||
template <typename NextT> struct Then {
|
||||
static void Copy(char *__restrict dst, const char *__restrict src,
|
||||
size_t size) {
|
||||
AlignmentT::Copy(dst, src);
|
||||
internal::AlignHelper<AlignOn, Alignment>::Bump(dst, src, size);
|
||||
NextT::Copy(dst, src, size);
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs, size_t size) {
|
||||
if (!AlignmentT::Equals(lhs, rhs))
|
||||
return false;
|
||||
internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
|
||||
return NextT::Equals(lhs, rhs, size);
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs, size_t size) {
|
||||
if (const int result = AlignmentT::ThreeWayCompare(lhs, rhs))
|
||||
return result;
|
||||
internal::AlignHelper<AlignOn, Alignment>::Bump(lhs, rhs, size);
|
||||
return NextT::ThreeWayCompare(lhs, rhs, size);
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value, size_t size) {
|
||||
AlignmentT::SplatSet(dst, value);
|
||||
char *dummy = nullptr;
|
||||
internal::AlignHelper<Arg::_1, Alignment>::Bump(dst, dummy, size);
|
||||
NextT::SplatSet(dst, value, size);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
// Fixed-size Builtin Operations
|
||||
// -----------------------------
|
||||
// Note: Do not use 'builtin' right now as it requires the implementation of the
|
||||
// `_inline` versions of all the builtins. Theoretically, Clang can still turn
|
||||
// them into calls to the C library leading to reentrancy problems.
|
||||
namespace builtin {
|
||||
|
||||
#ifndef __has_builtin
|
||||
#define __has_builtin(x) 0 // Compatibility with non-clang compilers.
|
||||
#endif
|
||||
|
||||
template <size_t Size> struct Builtin {
|
||||
static constexpr size_t kSize = Size;
|
||||
|
||||
static void Copy(char *__restrict dst, const char *__restrict src) {
|
||||
#if LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
|
||||
ForLoopCopy(dst, src);
|
||||
#elif __has_builtin(__builtin_memcpy_inline)
|
||||
// __builtin_memcpy_inline guarantees to never call external functions.
|
||||
// Unfortunately it is not widely available.
|
||||
__builtin_memcpy_inline(dst, src, kSize);
|
||||
#elif __has_builtin(__builtin_memcpy)
|
||||
__builtin_memcpy(dst, src, kSize);
|
||||
#else
|
||||
ForLoopCopy(dst, src);
|
||||
#endif
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs) {
|
||||
return __builtin_memcmp(lhs, rhs, kSize) == 0;
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs) {
|
||||
return __builtin_memcmp(lhs, rhs, kSize);
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
__builtin_memset(dst, value, kSize);
|
||||
}
|
||||
|
||||
private:
|
||||
// Copies `kSize` bytes from `src` to `dst` using a for loop.
|
||||
// This code requires the use of `-fno-buitin-memcpy` to prevent the compiler
|
||||
// from turning the for-loop back into `__builtin_memcpy`.
|
||||
static void ForLoopCopy(char *__restrict dst, const char *__restrict src) {
|
||||
for (size_t i = 0; i < kSize; ++i)
|
||||
dst[i] = src[i];
|
||||
}
|
||||
};
|
||||
|
||||
using _1 = Builtin<1>;
|
||||
using _2 = Builtin<2>;
|
||||
using _3 = Builtin<3>;
|
||||
using _4 = Builtin<4>;
|
||||
using _8 = Builtin<8>;
|
||||
using _16 = Builtin<16>;
|
||||
using _32 = Builtin<32>;
|
||||
using _64 = Builtin<64>;
|
||||
using _128 = Builtin<128>;
|
||||
|
||||
} // namespace builtin
|
||||
|
||||
// Fixed-size Scalar Operations
|
||||
// ----------------------------
|
||||
namespace scalar {
|
||||
|
||||
// The Scalar type makes use of simple sized integers.
|
||||
template <typename T> struct Scalar {
|
||||
static constexpr size_t kSize = sizeof(T);
|
||||
|
||||
static void Copy(char *__restrict dst, const char *__restrict src) {
|
||||
Store(dst, Load(src));
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs) {
|
||||
return Load(lhs) == Load(rhs);
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs) {
|
||||
return ScalarThreeWayCompare(Load(lhs), Load(rhs));
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
Store(dst, GetSplattedValue(value));
|
||||
}
|
||||
|
||||
private:
|
||||
static T Load(const char *ptr) {
|
||||
T value;
|
||||
__builtin_memcpy_inline(&value, ptr, kSize);
|
||||
return value;
|
||||
}
|
||||
static void Store(char *ptr, T value) {
|
||||
__builtin_memcpy_inline(ptr, &value, kSize);
|
||||
}
|
||||
static T GetSplattedValue(const unsigned char value) {
|
||||
return T(~0) / T(0xFF) * T(value);
|
||||
}
|
||||
static int ScalarThreeWayCompare(T a, T b);
|
||||
};
|
||||
|
||||
template <>
|
||||
inline int Scalar<uint8_t>::ScalarThreeWayCompare(uint8_t a, uint8_t b) {
|
||||
const int16_t la = Endian::ToBigEndian(a);
|
||||
const int16_t lb = Endian::ToBigEndian(b);
|
||||
return la - lb;
|
||||
}
|
||||
template <>
|
||||
inline int Scalar<uint16_t>::ScalarThreeWayCompare(uint16_t a, uint16_t b) {
|
||||
const int32_t la = Endian::ToBigEndian(a);
|
||||
const int32_t lb = Endian::ToBigEndian(b);
|
||||
return la - lb;
|
||||
}
|
||||
template <>
|
||||
inline int Scalar<uint32_t>::ScalarThreeWayCompare(uint32_t a, uint32_t b) {
|
||||
const int64_t la = Endian::ToBigEndian(a);
|
||||
const int64_t lb = Endian::ToBigEndian(b);
|
||||
if (la < lb)
|
||||
return -1;
|
||||
if (la > lb)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
template <>
|
||||
inline int Scalar<uint64_t>::ScalarThreeWayCompare(uint64_t a, uint64_t b) {
|
||||
const __int128_t la = Endian::ToBigEndian(a);
|
||||
const __int128_t lb = Endian::ToBigEndian(b);
|
||||
if (la < lb)
|
||||
return -1;
|
||||
if (la > lb)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
using UINT8 = Scalar<uint8_t>; // 1 Byte
|
||||
using UINT16 = Scalar<uint16_t>; // 2 Bytes
|
||||
using UINT32 = Scalar<uint32_t>; // 4 Bytes
|
||||
using UINT64 = Scalar<uint64_t>; // 8 Bytes
|
||||
|
||||
using _1 = UINT8;
|
||||
using _2 = UINT16;
|
||||
using _3 = Chained<UINT16, UINT8>;
|
||||
using _4 = UINT32;
|
||||
using _8 = UINT64;
|
||||
using _16 = Repeated<_8, 2>;
|
||||
using _32 = Repeated<_8, 4>;
|
||||
using _64 = Repeated<_8, 8>;
|
||||
using _128 = Repeated<_8, 16>;
|
||||
|
||||
} // namespace scalar
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#include <src/string/memory_utils/elements_x86.h>
|
||||
|
||||
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_H
|
|
@ -1,166 +0,0 @@
|
|||
//===-- Elementary operations for x86 -------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
|
||||
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
#include <stdint.h> // uint8_t, uint16_t, uint32_t, uint64_t
|
||||
|
||||
#ifdef __SSE2__
|
||||
#include <immintrin.h>
|
||||
#endif // __SSE2__
|
||||
|
||||
#include "src/string/memory_utils/elements.h" // __llvm_libc::scalar
|
||||
|
||||
// Fixed-size Vector Operations
|
||||
// ----------------------------
|
||||
|
||||
namespace __llvm_libc {
|
||||
namespace x86 {
|
||||
|
||||
#ifdef __SSE2__
|
||||
template <typename Base> struct Vector : public Base {
|
||||
static void Copy(char *dst, const char *src) {
|
||||
Base::Store(dst, Base::Load(src));
|
||||
}
|
||||
|
||||
static bool Equals(const char *a, const char *b) {
|
||||
return Base::NotEqualMask(Base::Load(a), Base::Load(b)) == 0;
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *a, const char *b) {
|
||||
const auto mask = Base::NotEqualMask(Base::Load(a), Base::Load(b));
|
||||
if (!mask)
|
||||
return 0;
|
||||
return CharDiff(a, b, mask);
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
Base::Store(dst, Base::GetSplattedValue(value));
|
||||
}
|
||||
|
||||
static int CharDiff(const char *a, const char *b, uint64_t mask) {
|
||||
const size_t diff_index = __builtin_ctzl(mask);
|
||||
const int ca = (unsigned char)a[diff_index];
|
||||
const int cb = (unsigned char)b[diff_index];
|
||||
return ca - cb;
|
||||
}
|
||||
};
|
||||
|
||||
struct M128 {
|
||||
static constexpr size_t kSize = 16;
|
||||
using T = char __attribute__((__vector_size__(kSize)));
|
||||
static uint16_t mask(T value) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm_movemask_epi8(value);
|
||||
}
|
||||
static uint16_t NotEqualMask(T a, T b) { return mask(a != b); }
|
||||
static T Load(const char *ptr) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm_loadu_si128(reinterpret_cast<__m128i_u const *>(ptr));
|
||||
}
|
||||
static void Store(char *ptr, T value) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm_storeu_si128(reinterpret_cast<__m128i_u *>(ptr), value);
|
||||
}
|
||||
static T GetSplattedValue(const char v) {
|
||||
const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
|
||||
return splatted;
|
||||
}
|
||||
};
|
||||
|
||||
using Vector128 = Vector<M128>; // 16 Bytes
|
||||
|
||||
#ifdef __AVX2__
|
||||
struct M256 {
|
||||
static constexpr size_t kSize = 32;
|
||||
using T = char __attribute__((__vector_size__(kSize)));
|
||||
static uint32_t mask(T value) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm256_movemask_epi8(value);
|
||||
}
|
||||
static uint32_t NotEqualMask(T a, T b) { return mask(a != b); }
|
||||
static T Load(const char *ptr) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm256_loadu_si256(reinterpret_cast<__m256i const *>(ptr));
|
||||
}
|
||||
static void Store(char *ptr, T value) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm256_storeu_si256(reinterpret_cast<__m256i *>(ptr), value);
|
||||
}
|
||||
static T GetSplattedValue(const char v) {
|
||||
const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
|
||||
return splatted;
|
||||
}
|
||||
};
|
||||
|
||||
using Vector256 = Vector<M256>; // 32 Bytes
|
||||
|
||||
#if defined(__AVX512F__) and defined(__AVX512BW__)
|
||||
struct M512 {
|
||||
static constexpr size_t kSize = 64;
|
||||
using T = char __attribute__((__vector_size__(kSize)));
|
||||
static uint64_t NotEqualMask(T a, T b) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm512_cmpneq_epi8_mask(a, b);
|
||||
}
|
||||
static T Load(const char *ptr) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm512_loadu_epi8(ptr);
|
||||
}
|
||||
static void Store(char *ptr, T value) {
|
||||
// NOLINTNEXTLINE(llvmlibc-callee-namespace)
|
||||
return _mm512_storeu_epi8(ptr, value);
|
||||
}
|
||||
static T GetSplattedValue(const char v) {
|
||||
const T splatted = {v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
|
||||
return splatted;
|
||||
}
|
||||
};
|
||||
using Vector512 = Vector<M512>;
|
||||
|
||||
#endif // defined(__AVX512F__) and defined(__AVX512BW__)
|
||||
#endif // __AVX2__
|
||||
#endif // __SSE2__
|
||||
|
||||
using _1 = __llvm_libc::scalar::_1;
|
||||
using _2 = __llvm_libc::scalar::_2;
|
||||
using _3 = __llvm_libc::scalar::_3;
|
||||
using _4 = __llvm_libc::scalar::_4;
|
||||
using _8 = __llvm_libc::scalar::_8;
|
||||
#if defined(__AVX512F__) && defined(__AVX512BW__)
|
||||
using _16 = __llvm_libc::x86::Vector128;
|
||||
using _32 = __llvm_libc::x86::Vector256;
|
||||
using _64 = __llvm_libc::x86::Vector512;
|
||||
using _128 = __llvm_libc::Repeated<_64, 2>;
|
||||
#elif defined(__AVX2__)
|
||||
using _16 = __llvm_libc::x86::Vector128;
|
||||
using _32 = __llvm_libc::x86::Vector256;
|
||||
using _64 = __llvm_libc::Repeated<_32, 2>;
|
||||
using _128 = __llvm_libc::Repeated<_32, 4>;
|
||||
#elif defined(__SSE2__)
|
||||
using _16 = __llvm_libc::x86::Vector128;
|
||||
using _32 = __llvm_libc::Repeated<_16, 2>;
|
||||
using _64 = __llvm_libc::Repeated<_16, 4>;
|
||||
using _128 = __llvm_libc::Repeated<_16, 8>;
|
||||
#else
|
||||
using _16 = __llvm_libc::Repeated<_8, 2>;
|
||||
using _32 = __llvm_libc::Repeated<_8, 4>;
|
||||
using _64 = __llvm_libc::Repeated<_8, 8>;
|
||||
using _128 = __llvm_libc::Repeated<_8, 16>;
|
||||
#endif
|
||||
|
||||
} // namespace x86
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_ELEMENTS_X86_H
|
|
@ -0,0 +1,140 @@
|
|||
//===-- Memcpy utils --------------------------------------------*- C++ -*-===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H
|
||||
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H
|
||||
|
||||
#include "src/__support/sanitizer.h"
|
||||
#include "src/string/memory_utils/utils.h"
|
||||
#include <stddef.h> // size_t
|
||||
|
||||
// __builtin_memcpy_inline guarantees to never call external functions.
|
||||
// Unfortunately it is not widely available.
|
||||
#ifdef __clang__
|
||||
#if __has_builtin(__builtin_memcpy_inline)
|
||||
#define USE_BUILTIN_MEMCPY_INLINE
|
||||
#endif
|
||||
#elif defined(__GNUC__)
|
||||
#define USE_BUILTIN_MEMCPY
|
||||
#endif
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
// This is useful for testing.
|
||||
#if defined(LLVM_LIBC_MEMCPY_MONITOR)
|
||||
extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict,
|
||||
const char *__restrict, size_t);
|
||||
#endif
|
||||
|
||||
// Copies `kBlockSize` bytes from `src` to `dst` using a for loop.
|
||||
// This code requires the use of `-fno-buitin-memcpy` to prevent the compiler
|
||||
// from turning the for-loop back into `__builtin_memcpy`.
|
||||
template <size_t kBlockSize>
|
||||
static void ForLoopCopy(char *__restrict dst, const char *__restrict src) {
|
||||
for (size_t i = 0; i < kBlockSize; ++i)
|
||||
dst[i] = src[i];
|
||||
}
|
||||
|
||||
// Copies `kBlockSize` bytes from `src` to `dst`.
|
||||
template <size_t kBlockSize>
|
||||
static void CopyBlock(char *__restrict dst, const char *__restrict src) {
|
||||
#if defined(LLVM_LIBC_MEMCPY_MONITOR)
|
||||
LLVM_LIBC_MEMCPY_MONITOR(dst, src, kBlockSize);
|
||||
#elif LLVM_LIBC_HAVE_MEMORY_SANITIZER || LLVM_LIBC_HAVE_ADDRESS_SANITIZER
|
||||
ForLoopCopy<kBlockSize>(dst, src);
|
||||
#elif defined(USE_BUILTIN_MEMCPY_INLINE)
|
||||
__builtin_memcpy_inline(dst, src, kBlockSize);
|
||||
#elif defined(USE_BUILTIN_MEMCPY)
|
||||
__builtin_memcpy(dst, src, kBlockSize);
|
||||
#else
|
||||
ForLoopCopy<kBlockSize>(dst, src);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Copies `kBlockSize` bytes from `src + count - kBlockSize` to
|
||||
// `dst + count - kBlockSize`.
|
||||
// Precondition: `count >= kBlockSize`.
|
||||
template <size_t kBlockSize>
|
||||
static void CopyLastBlock(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
const size_t offset = count - kBlockSize;
|
||||
CopyBlock<kBlockSize>(dst + offset, src + offset);
|
||||
}
|
||||
|
||||
// Copies `kBlockSize` bytes twice with an overlap between the two.
|
||||
//
|
||||
// [1234567812345678123]
|
||||
// [__XXXXXXXXXXXXXX___]
|
||||
// [__XXXXXXXX_________]
|
||||
// [________XXXXXXXX___]
|
||||
//
|
||||
// Precondition: `count >= kBlockSize && count <= kBlockSize`.
|
||||
template <size_t kBlockSize>
|
||||
static void CopyBlockOverlap(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
CopyBlock<kBlockSize>(dst, src);
|
||||
CopyLastBlock<kBlockSize>(dst, src, count);
|
||||
}
|
||||
|
||||
// Copies `count` bytes by blocks of `kBlockSize` bytes.
|
||||
// Copies at the start and end of the buffer are unaligned.
|
||||
// Copies in the middle of the buffer are aligned to `kAlignment`.
|
||||
//
|
||||
// e.g. with
|
||||
// [12345678123456781234567812345678]
|
||||
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXXX___]
|
||||
// [__XXXX___________________________]
|
||||
// [_____XXXXXXXX____________________]
|
||||
// [_____________XXXXXXXX____________]
|
||||
// [_____________________XXXXXXXX____]
|
||||
// [______________________XXXXXXXX___]
|
||||
//
|
||||
// Precondition: `kAlignment <= kBlockSize`
|
||||
// `count > 2 * kBlockSize` for efficiency.
|
||||
// `count >= kAlignment` for correctness.
|
||||
template <size_t kBlockSize, size_t kAlignment = kBlockSize>
|
||||
static void CopySrcAlignedBlocks(char *__restrict dst,
|
||||
const char *__restrict src, size_t count) {
|
||||
static_assert(is_power2(kAlignment), "kAlignment must be a power of two");
|
||||
static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two");
|
||||
static_assert(kAlignment <= kBlockSize,
|
||||
"kAlignment must be less or equal to block size");
|
||||
CopyBlock<kAlignment>(dst, src); // Copy first block
|
||||
|
||||
// Copy aligned blocks
|
||||
const size_t ofla = offset_from_last_aligned<kAlignment>(src);
|
||||
const size_t limit = count + ofla - kBlockSize;
|
||||
for (size_t offset = kAlignment; offset < limit; offset += kBlockSize)
|
||||
CopyBlock<kBlockSize>(dst - ofla + offset,
|
||||
assume_aligned<kAlignment>(src - ofla + offset));
|
||||
|
||||
CopyLastBlock<kBlockSize>(dst, src, count); // Copy last block
|
||||
}
|
||||
|
||||
template <size_t kBlockSize, size_t kAlignment = kBlockSize>
|
||||
static void CopyDstAlignedBlocks(char *__restrict dst,
|
||||
const char *__restrict src, size_t count) {
|
||||
static_assert(is_power2(kAlignment), "kAlignment must be a power of two");
|
||||
static_assert(is_power2(kBlockSize), "kBlockSize must be a power of two");
|
||||
static_assert(kAlignment <= kBlockSize,
|
||||
"kAlignment must be less or equal to block size");
|
||||
CopyBlock<kAlignment>(dst, src); // Copy first block
|
||||
|
||||
// Copy aligned blocks
|
||||
const size_t ofla = offset_from_last_aligned<kAlignment>(dst);
|
||||
const size_t limit = count + ofla - kBlockSize;
|
||||
for (size_t offset = kAlignment; offset < limit; offset += kBlockSize)
|
||||
CopyBlock<kBlockSize>(assume_aligned<kAlignment>(dst - ofla + offset),
|
||||
src - ofla + offset);
|
||||
|
||||
CopyLastBlock<kBlockSize>(dst, src, count); // Copy last block
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_UTILS_H
|
|
@ -6,16 +6,70 @@
|
|||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
|
||||
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
|
||||
#ifndef LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
|
||||
#define LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
|
||||
|
||||
#include "src/string/memory_utils/elements.h"
|
||||
#include "src/string/memory_utils/utils.h"
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
// Sets `kBlockSize` bytes starting from `src` to `value`.
|
||||
template <size_t kBlockSize> static void SetBlock(char *dst, unsigned value) {
|
||||
// Theoretically the compiler is allowed to call memset here and end up with a
|
||||
// recursive call, practically it doesn't happen, however this should be
|
||||
// replaced with a __builtin_memset_inline once it's available in clang.
|
||||
__builtin_memset(dst, value, kBlockSize);
|
||||
}
|
||||
|
||||
// Sets `kBlockSize` bytes from `src + count - kBlockSize` to `value`.
|
||||
// Precondition: `count >= kBlockSize`.
|
||||
template <size_t kBlockSize>
|
||||
static void SetLastBlock(char *dst, unsigned value, size_t count) {
|
||||
SetBlock<kBlockSize>(dst + count - kBlockSize, value);
|
||||
}
|
||||
|
||||
// Sets `kBlockSize` bytes twice with an overlap between the two.
|
||||
//
|
||||
// [1234567812345678123]
|
||||
// [__XXXXXXXXXXXXXX___]
|
||||
// [__XXXXXXXX_________]
|
||||
// [________XXXXXXXX___]
|
||||
//
|
||||
// Precondition: `count >= kBlockSize && count <= kBlockSize`.
|
||||
template <size_t kBlockSize>
|
||||
static void SetBlockOverlap(char *dst, unsigned value, size_t count) {
|
||||
SetBlock<kBlockSize>(dst, value);
|
||||
SetLastBlock<kBlockSize>(dst, value, count);
|
||||
}
|
||||
|
||||
// Sets `count` bytes by blocks of `kBlockSize` bytes.
|
||||
// Sets at the start and end of the buffer are unaligned.
|
||||
// Sets in the middle of the buffer are aligned to `kBlockSize`.
|
||||
//
|
||||
// e.g. with
|
||||
// [12345678123456781234567812345678]
|
||||
// [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___]
|
||||
// [__XXXXXXXX______________________]
|
||||
// [________XXXXXXXX________________]
|
||||
// [________________XXXXXXXX________]
|
||||
// [_____________________XXXXXXXX___]
|
||||
//
|
||||
// Precondition: `count > 2 * kBlockSize` for efficiency.
|
||||
// `count >= kBlockSize` for correctness.
|
||||
template <size_t kBlockSize>
|
||||
static void SetAlignedBlocks(char *dst, unsigned value, size_t count) {
|
||||
SetBlock<kBlockSize>(dst, value); // Set first block
|
||||
|
||||
// Set aligned blocks
|
||||
size_t offset = kBlockSize - offset_from_last_aligned<kBlockSize>(dst);
|
||||
for (; offset + kBlockSize < count; offset += kBlockSize)
|
||||
SetBlock<kBlockSize>(dst + offset, value);
|
||||
|
||||
SetLastBlock<kBlockSize>(dst, value, count); // Set last block
|
||||
}
|
||||
|
||||
// A general purpose implementation assuming cheap unaligned writes for sizes:
|
||||
// 1, 2, 4, 8, 16, 32 and 64 Bytes. Note that some architecture can't store 32
|
||||
// or 64 Bytes at a time, the compiler will expand them as needed.
|
||||
|
@ -52,27 +106,26 @@ inline static void GeneralPurposeMemset(char *dst, unsigned char value,
|
|||
if (count == 0)
|
||||
return;
|
||||
if (count == 1)
|
||||
return SplatSet<scalar::_1>(dst, value);
|
||||
return SetBlock<1>(dst, value);
|
||||
if (count == 2)
|
||||
return SplatSet<scalar::_2>(dst, value);
|
||||
return SetBlock<2>(dst, value);
|
||||
if (count == 3)
|
||||
return SplatSet<scalar::_3>(dst, value);
|
||||
return SetBlock<3>(dst, value);
|
||||
if (count == 4)
|
||||
return SplatSet<scalar::_4>(dst, value);
|
||||
return SetBlock<4>(dst, value);
|
||||
if (count <= 8)
|
||||
return SplatSet<HeadTail<scalar::_4>>(dst, value, count);
|
||||
return SetBlockOverlap<4>(dst, value, count);
|
||||
if (count <= 16)
|
||||
return SplatSet<HeadTail<scalar::_8>>(dst, value, count);
|
||||
return SetBlockOverlap<8>(dst, value, count);
|
||||
if (count <= 32)
|
||||
return SplatSet<HeadTail<scalar::_16>>(dst, value, count);
|
||||
return SetBlockOverlap<16>(dst, value, count);
|
||||
if (count <= 64)
|
||||
return SplatSet<HeadTail<scalar::_32>>(dst, value, count);
|
||||
return SetBlockOverlap<32>(dst, value, count);
|
||||
if (count <= 128)
|
||||
return SplatSet<HeadTail<scalar::_64>>(dst, value, count);
|
||||
return SplatSet<Align<scalar::_32, Arg::Dst>::Then<Loop<scalar::_32>>>(
|
||||
dst, value, count);
|
||||
return SetBlockOverlap<64>(dst, value, count);
|
||||
return SetAlignedBlocks<32>(dst, value, count);
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
||||
|
||||
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
|
||||
#endif // LIBC_SRC_STRING_MEMORY_UTILS_MEMSET_UTILS_H
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
|
||||
#include "src/string/memcpy.h"
|
||||
#include "src/__support/common.h"
|
||||
#include "src/string/memory_utils/elements.h"
|
||||
#include "src/string/memory_utils/memcpy_utils.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
|
@ -29,11 +29,8 @@ constexpr size_t kRepMovsBSize =
|
|||
// Whether target supports AVX instructions.
|
||||
constexpr bool kHasAvx = LLVM_LIBC_IS_DEFINED(__AVX__);
|
||||
|
||||
#ifdef __AVX__
|
||||
using LoopBlockSize = __llvm_libc::x86::_64;
|
||||
#else
|
||||
using LoopBlockSize = __llvm_libc::x86::_32;
|
||||
#endif
|
||||
// The chunk size used for the loop copy strategy.
|
||||
constexpr size_t kLoopCopyBlockSize = kHasAvx ? 64 : 32;
|
||||
|
||||
static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
|
@ -64,37 +61,33 @@ static void CopyRepMovsb(char *__restrict dst, const char *__restrict src,
|
|||
// with little change on the code side.
|
||||
static void memcpy_x86(char *__restrict dst, const char *__restrict src,
|
||||
size_t count) {
|
||||
// Use x86 strategies (_1, _2, _3 ...)
|
||||
using namespace __llvm_libc::x86;
|
||||
|
||||
if (kUseOnlyRepMovsb)
|
||||
return CopyRepMovsb(dst, src, count);
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
if (count == 1)
|
||||
return Copy<_1>(dst, src);
|
||||
return CopyBlock<1>(dst, src);
|
||||
if (count == 2)
|
||||
return Copy<_2>(dst, src);
|
||||
return CopyBlock<2>(dst, src);
|
||||
if (count == 3)
|
||||
return Copy<_3>(dst, src);
|
||||
return CopyBlock<3>(dst, src);
|
||||
if (count == 4)
|
||||
return Copy<_4>(dst, src);
|
||||
return CopyBlock<4>(dst, src);
|
||||
if (count < 8)
|
||||
return Copy<HeadTail<_4>>(dst, src, count);
|
||||
return CopyBlockOverlap<4>(dst, src, count);
|
||||
if (count < 16)
|
||||
return Copy<HeadTail<_8>>(dst, src, count);
|
||||
return CopyBlockOverlap<8>(dst, src, count);
|
||||
if (count < 32)
|
||||
return Copy<HeadTail<_16>>(dst, src, count);
|
||||
return CopyBlockOverlap<16>(dst, src, count);
|
||||
if (count < 64)
|
||||
return Copy<HeadTail<_32>>(dst, src, count);
|
||||
return CopyBlockOverlap<32>(dst, src, count);
|
||||
if (count < 128)
|
||||
return Copy<HeadTail<_64>>(dst, src, count);
|
||||
return CopyBlockOverlap<64>(dst, src, count);
|
||||
if (kHasAvx && count < 256)
|
||||
return Copy<HeadTail<_128>>(dst, src, count);
|
||||
return CopyBlockOverlap<128>(dst, src, count);
|
||||
if (count <= kRepMovsBSize)
|
||||
return Copy<Align<_32, Arg::Dst>::Then<Loop<LoopBlockSize>>>(dst, src,
|
||||
count);
|
||||
return CopyDstAlignedBlocks<kLoopCopyBlockSize, 32>(dst, src, count);
|
||||
return CopyRepMovsb(dst, src, count);
|
||||
}
|
||||
|
||||
|
|
|
@ -3,13 +3,15 @@ add_libc_unittest(
|
|||
SUITE
|
||||
libc_string_unittests
|
||||
SRCS
|
||||
elements_test.cpp
|
||||
memory_access_test.cpp
|
||||
utils_test.cpp
|
||||
memcpy_utils_test.cpp
|
||||
DEPENDS
|
||||
libc.src.string.memory_utils.memory_utils
|
||||
libc.utils.CPP.standalone_cpp
|
||||
COMPILE_OPTIONS
|
||||
${LIBC_COMPILE_OPTIONS_NATIVE}
|
||||
-ffreestanding
|
||||
)
|
||||
)
|
||||
|
||||
target_compile_definitions(
|
||||
libc.test.src.string.memory_utils.utils_test
|
||||
PRIVATE
|
||||
LLVM_LIBC_MEMCPY_MONITOR=memcpy_monitor
|
||||
)
|
||||
|
|
|
@ -1,103 +0,0 @@
|
|||
//===-- Unittests for memory_utils ----------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/string/memory_utils/elements.h"
|
||||
#include "utils/CPP/Array.h"
|
||||
#include "utils/UnitTest/Test.h"
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
// Registering Types
|
||||
using FixedSizeTypes = testing::TypeList<
|
||||
#ifdef __SSE2__
|
||||
x86::Vector128, //
|
||||
#endif // __SSE2__
|
||||
#ifdef __AVX2__
|
||||
x86::Vector256, //
|
||||
#endif // __AVX2__
|
||||
#if defined(__AVX512F__) and defined(__AVX512BW__)
|
||||
x86::Vector512, //
|
||||
#endif // defined(__AVX512F__) and defined(__AVX512BW__)
|
||||
scalar::UINT8, //
|
||||
scalar::UINT16, //
|
||||
scalar::UINT32, //
|
||||
scalar::UINT64, //
|
||||
Repeated<scalar::UINT64, 2>, //
|
||||
Repeated<scalar::UINT64, 4>, //
|
||||
Repeated<scalar::UINT64, 8>, //
|
||||
Repeated<scalar::UINT64, 16>, //
|
||||
Repeated<scalar::UINT64, 32>, //
|
||||
Chained<scalar::UINT16, scalar::UINT8>, //
|
||||
Chained<scalar::UINT32, scalar::UINT16, scalar::UINT8>, //
|
||||
builtin::_1, //
|
||||
builtin::_2, //
|
||||
builtin::_3, //
|
||||
builtin::_4, //
|
||||
builtin::_8 //
|
||||
>;
|
||||
|
||||
char GetRandomChar() {
|
||||
static constexpr const uint64_t a = 1103515245;
|
||||
static constexpr const uint64_t c = 12345;
|
||||
static constexpr const uint64_t m = 1ULL << 31;
|
||||
static uint64_t seed = 123456789;
|
||||
seed = (a * seed + c) % m;
|
||||
return seed;
|
||||
}
|
||||
|
||||
template <typename Element> using Buffer = cpp::Array<char, Element::kSize>;
|
||||
template <typename Element> Buffer<Element> GetRandomBuffer() {
|
||||
Buffer<Element> buffer;
|
||||
for (auto ¤t : buffer)
|
||||
current = GetRandomChar();
|
||||
return buffer;
|
||||
}
|
||||
|
||||
TYPED_TEST(LlvmLibcMemoryElements, Copy, FixedSizeTypes) {
|
||||
Buffer<ParamType> Dst;
|
||||
const auto buffer = GetRandomBuffer<ParamType>();
|
||||
Copy<ParamType>(Dst.data(), buffer.data());
|
||||
for (size_t i = 0; i < ParamType::kSize; ++i)
|
||||
EXPECT_EQ(Dst[i], buffer[i]);
|
||||
}
|
||||
|
||||
TYPED_TEST(LlvmLibcMemoryElements, Equals, FixedSizeTypes) {
|
||||
const auto buffer = GetRandomBuffer<ParamType>();
|
||||
EXPECT_TRUE(Equals<ParamType>(buffer.data(), buffer.data()));
|
||||
}
|
||||
|
||||
TYPED_TEST(LlvmLibcMemoryElements, ThreeWayCompare, FixedSizeTypes) {
|
||||
Buffer<ParamType> initial;
|
||||
for (auto &c : initial)
|
||||
c = 5;
|
||||
|
||||
// Testing equality
|
||||
EXPECT_EQ(ThreeWayCompare<ParamType>(initial.data(), initial.data()), 0);
|
||||
|
||||
// Testing all mismatching positions
|
||||
for (size_t i = 0; i < ParamType::kSize; ++i) {
|
||||
auto copy = initial;
|
||||
++copy[i]; // Copy is now lexicographycally greated than initial
|
||||
const auto *less = initial.data();
|
||||
const auto *greater = copy.data();
|
||||
EXPECT_LT(ThreeWayCompare<ParamType>(less, greater), 0);
|
||||
EXPECT_GT(ThreeWayCompare<ParamType>(greater, less), 0);
|
||||
}
|
||||
}
|
||||
|
||||
TYPED_TEST(LlvmLibcMemoryElements, Splat, FixedSizeTypes) {
|
||||
Buffer<ParamType> Dst;
|
||||
const cpp::Array<char, 3> values = {char(0x00), char(0x7F), char(0xFF)};
|
||||
for (char value : values) {
|
||||
SplatSet<ParamType>(Dst.data(), value);
|
||||
for (size_t i = 0; i < ParamType::kSize; ++i)
|
||||
EXPECT_EQ(Dst[i], value);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
|
@ -0,0 +1,336 @@
|
|||
//===-- Unittests for memory_utils ----------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "src/string/memory_utils/memcpy_utils.h"
|
||||
#include "utils/CPP/Array.h"
|
||||
#include "utils/UnitTest/Test.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h> // uintptr_t
|
||||
|
||||
#ifndef LLVM_LIBC_MEMCPY_MONITOR
|
||||
#error LLVM_LIBC_MEMCPY_MONITOR must be defined for this test.
|
||||
#endif
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
struct Buffer {
|
||||
static constexpr size_t kMaxBuffer = 1024;
|
||||
char buffer[kMaxBuffer + 1];
|
||||
size_t last = 0;
|
||||
|
||||
void Clear() {
|
||||
last = 0;
|
||||
for (size_t i = 0; i < kMaxBuffer; ++i)
|
||||
buffer[i] = '0';
|
||||
buffer[kMaxBuffer] = '\0';
|
||||
}
|
||||
|
||||
void Increment(const void *ptr) {
|
||||
const auto offset = reinterpret_cast<uintptr_t>(ptr);
|
||||
assert(offset < kMaxBuffer);
|
||||
++buffer[offset];
|
||||
if (offset > last)
|
||||
last = offset;
|
||||
}
|
||||
|
||||
char *Finish() {
|
||||
assert(last < kMaxBuffer);
|
||||
buffer[last + 1] = '\0';
|
||||
return buffer;
|
||||
}
|
||||
};
|
||||
|
||||
struct Trace {
|
||||
Buffer read;
|
||||
Buffer write;
|
||||
|
||||
void Add(char *__restrict dst, const char *__restrict src, size_t count) {
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
read.Increment(src + i);
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
write.Increment(dst + i);
|
||||
}
|
||||
|
||||
void Clear() {
|
||||
read.Clear();
|
||||
write.Clear();
|
||||
}
|
||||
|
||||
char *Read() { return read.Finish(); }
|
||||
char *Write() { return write.Finish(); }
|
||||
};
|
||||
|
||||
static Trace &GetTrace() {
|
||||
static thread_local Trace events;
|
||||
return events;
|
||||
}
|
||||
|
||||
extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict dst,
|
||||
const char *__restrict src,
|
||||
size_t count) {
|
||||
GetTrace().Add(dst, src, count);
|
||||
}
|
||||
|
||||
char *I(uintptr_t offset) { return reinterpret_cast<char *>(offset); }
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopyTrivial) {
|
||||
auto &trace = GetTrace();
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<1>(I(0), I(0));
|
||||
EXPECT_STREQ(trace.Write(), "1");
|
||||
EXPECT_STREQ(trace.Read(), "1");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<2>(I(0), I(0));
|
||||
EXPECT_STREQ(trace.Write(), "11");
|
||||
EXPECT_STREQ(trace.Read(), "11");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<4>(I(0), I(0));
|
||||
EXPECT_STREQ(trace.Write(), "1111");
|
||||
EXPECT_STREQ(trace.Read(), "1111");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<8>(I(0), I(0));
|
||||
EXPECT_STREQ(trace.Write(), "11111111");
|
||||
EXPECT_STREQ(trace.Read(), "11111111");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<16>(I(0), I(0));
|
||||
EXPECT_STREQ(trace.Write(), "1111111111111111");
|
||||
EXPECT_STREQ(trace.Read(), "1111111111111111");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<32>(I(0), I(0));
|
||||
EXPECT_STREQ(trace.Write(), "11111111111111111111111111111111");
|
||||
EXPECT_STREQ(trace.Read(), "11111111111111111111111111111111");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<64>(I(0), I(0));
|
||||
EXPECT_STREQ(
|
||||
trace.Write(),
|
||||
"1111111111111111111111111111111111111111111111111111111111111111");
|
||||
EXPECT_STREQ(
|
||||
trace.Read(),
|
||||
"1111111111111111111111111111111111111111111111111111111111111111");
|
||||
}
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopyOffset) {
|
||||
auto &trace = GetTrace();
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<1>(I(3), I(1));
|
||||
EXPECT_STREQ(trace.Write(), "0001");
|
||||
EXPECT_STREQ(trace.Read(), "01");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlock<1>(I(2), I(1));
|
||||
EXPECT_STREQ(trace.Write(), "001");
|
||||
EXPECT_STREQ(trace.Read(), "01");
|
||||
}
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopyBlockOverlap) {
|
||||
auto &trace = GetTrace();
|
||||
|
||||
trace.Clear();
|
||||
CopyBlockOverlap<2>(I(0), I(0), 2);
|
||||
EXPECT_STREQ(trace.Write(), "22");
|
||||
EXPECT_STREQ(trace.Read(), "22");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlockOverlap<2>(I(0), I(0), 3);
|
||||
EXPECT_STREQ(trace.Write(), "121");
|
||||
EXPECT_STREQ(trace.Read(), "121");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlockOverlap<2>(I(0), I(0), 4);
|
||||
EXPECT_STREQ(trace.Write(), "1111");
|
||||
EXPECT_STREQ(trace.Read(), "1111");
|
||||
|
||||
trace.Clear();
|
||||
CopyBlockOverlap<4>(I(2), I(1), 7);
|
||||
EXPECT_STREQ(trace.Write(), "001112111");
|
||||
EXPECT_STREQ(trace.Read(), "01112111");
|
||||
}
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopySrcAlignedBlocks) {
|
||||
auto &trace = GetTrace();
|
||||
// Source is aligned and multiple of alignment.
|
||||
// "1111"
|
||||
trace.Clear();
|
||||
CopySrcAlignedBlocks<4>(I(0), I(0), 4);
|
||||
EXPECT_STREQ(trace.Write(), "2222");
|
||||
EXPECT_STREQ(trace.Read(), "2222");
|
||||
|
||||
// Source is aligned and multiple of alignment.
|
||||
// "11110000"
|
||||
// + "00001111"
|
||||
// = "11111111"
|
||||
trace.Clear();
|
||||
CopySrcAlignedBlocks<4>(I(0), I(0), 8);
|
||||
EXPECT_STREQ(trace.Write(), "11111111");
|
||||
EXPECT_STREQ(trace.Read(), "11111111");
|
||||
|
||||
// Source is aligned already overlap at end.
|
||||
// "1111000000000"
|
||||
// + "0000111100000"
|
||||
// + "0000000011110"
|
||||
// + "0000000001111"
|
||||
// = "1111111112221"
|
||||
trace.Clear();
|
||||
CopySrcAlignedBlocks<4>(I(0), I(0), 13);
|
||||
EXPECT_STREQ(trace.Write(), "1111111112221");
|
||||
EXPECT_STREQ(trace.Read(), "1111111112221");
|
||||
|
||||
// Misaligned source.
|
||||
// "01111000000000"
|
||||
// + "00001111000000"
|
||||
// + "00000000111100"
|
||||
// + "00000000001111"
|
||||
// = "01112111112211"
|
||||
trace.Clear();
|
||||
CopySrcAlignedBlocks<4>(I(0), I(1), 13);
|
||||
EXPECT_STREQ(trace.Write(), "1112111112211");
|
||||
EXPECT_STREQ(trace.Read(), "01112111112211");
|
||||
|
||||
// Misaligned source aligned at end.
|
||||
// "011110000000"
|
||||
// + "000011110000"
|
||||
// + "000000001111"
|
||||
// = "011121111111"
|
||||
trace.Clear();
|
||||
CopySrcAlignedBlocks<4>(I(0), I(1), 11);
|
||||
EXPECT_STREQ(trace.Write(), "11121111111");
|
||||
EXPECT_STREQ(trace.Read(), "011121111111");
|
||||
}
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopyDstAlignedBlocks) {
|
||||
auto &trace = GetTrace();
|
||||
// Destination is aligned and multiple of alignment.
|
||||
// "1111"
|
||||
trace.Clear();
|
||||
CopyDstAlignedBlocks<4>(I(0), I(0), 4);
|
||||
EXPECT_STREQ(trace.Write(), "2222");
|
||||
EXPECT_STREQ(trace.Read(), "2222");
|
||||
|
||||
// Destination is aligned and multiple of alignment.
|
||||
// "11110000"
|
||||
// + "00001111"
|
||||
// = "11111111"
|
||||
trace.Clear();
|
||||
CopyDstAlignedBlocks<4>(I(0), I(0), 8);
|
||||
EXPECT_STREQ(trace.Write(), "11111111");
|
||||
EXPECT_STREQ(trace.Read(), "11111111");
|
||||
|
||||
// Destination is aligned already overlap at end.
|
||||
// "1111000000000"
|
||||
// + "0000111100000"
|
||||
// + "0000000011110"
|
||||
// + "0000000001111"
|
||||
// = "1111111112221"
|
||||
trace.Clear();
|
||||
CopyDstAlignedBlocks<4>(I(0), I(0), 13);
|
||||
EXPECT_STREQ(trace.Write(), "1111111112221");
|
||||
EXPECT_STREQ(trace.Read(), "1111111112221");
|
||||
|
||||
// Misaligned destination.
|
||||
// "01111000000000"
|
||||
// + "00001111000000"
|
||||
// + "00000000111100"
|
||||
// + "00000000001111"
|
||||
// = "01112111112211"
|
||||
trace.Clear();
|
||||
CopyDstAlignedBlocks<4>(I(1), I(0), 13);
|
||||
EXPECT_STREQ(trace.Write(), "01112111112211");
|
||||
EXPECT_STREQ(trace.Read(), "1112111112211");
|
||||
|
||||
// Misaligned destination aligned at end.
|
||||
// "011110000000"
|
||||
// + "000011110000"
|
||||
// + "000000001111"
|
||||
// = "011121111111"
|
||||
trace.Clear();
|
||||
CopyDstAlignedBlocks<4>(I(1), I(0), 11);
|
||||
EXPECT_STREQ(trace.Write(), "011121111111");
|
||||
EXPECT_STREQ(trace.Read(), "11121111111");
|
||||
}
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopyAlignedBlocksWithAlignment) {
|
||||
auto &trace = GetTrace();
|
||||
// Source is aligned and multiple of alignment.
|
||||
// "11111111"
|
||||
trace.Clear();
|
||||
CopySrcAlignedBlocks<8, 4>(I(0), I(0), 8);
|
||||
EXPECT_STREQ(trace.Write(), "22221111");
|
||||
EXPECT_STREQ(trace.Read(), "22221111");
|
||||
|
||||
// Destination is aligned and multiple of alignment.
|
||||
// "11111111"
|
||||
trace.Clear();
|
||||
CopyDstAlignedBlocks<8, 4>(I(0), I(0), 8);
|
||||
EXPECT_STREQ(trace.Write(), "22221111");
|
||||
EXPECT_STREQ(trace.Read(), "22221111");
|
||||
|
||||
// Source is aligned and multiple of alignment.
|
||||
// "111111111"
|
||||
trace.Clear();
|
||||
CopySrcAlignedBlocks<8, 4>(I(0), I(0), 9);
|
||||
EXPECT_STREQ(trace.Write(), "122211111");
|
||||
EXPECT_STREQ(trace.Read(), "122211111");
|
||||
|
||||
// Destination is aligned and multiple of alignment.
|
||||
// "111111111"
|
||||
trace.Clear();
|
||||
CopyDstAlignedBlocks<8, 4>(I(0), I(0), 9);
|
||||
EXPECT_STREQ(trace.Write(), "122211111");
|
||||
EXPECT_STREQ(trace.Read(), "122211111");
|
||||
}
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopyAlignedBlocksMaxReloads) {
|
||||
auto &trace = GetTrace();
|
||||
for (size_t alignment = 0; alignment < 32; ++alignment) {
|
||||
for (size_t count = 64; count < 768; ++count) {
|
||||
trace.Clear();
|
||||
// We should never reload more than twice when copying from count = 2x32.
|
||||
CopySrcAlignedBlocks<32>(I(alignment), I(0), count);
|
||||
const char *const written = trace.Write();
|
||||
// First bytes are untouched.
|
||||
for (size_t i = 0; i < alignment; ++i)
|
||||
EXPECT_EQ(written[i], '0');
|
||||
// Next bytes are loaded once or twice but no more.
|
||||
for (size_t i = alignment; i < count; ++i) {
|
||||
EXPECT_GE(written[i], '1');
|
||||
EXPECT_LE(written[i], '2');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(LlvmLibcMemcpyUtilsTest, CopyAlignedBlocksWithAlignmentMaxReloads) {
|
||||
auto &trace = GetTrace();
|
||||
for (size_t alignment = 0; alignment < 32; ++alignment) {
|
||||
for (size_t count = 64; count < 768; ++count) {
|
||||
trace.Clear();
|
||||
// We should never reload more than twice when copying from count = 2x32.
|
||||
CopySrcAlignedBlocks<32, 16>(I(alignment), I(0), count);
|
||||
const char *const written = trace.Write();
|
||||
// First bytes are untouched.
|
||||
for (size_t i = 0; i < alignment; ++i)
|
||||
EXPECT_EQ(written[i], '0');
|
||||
// Next bytes are loaded once or twice but no more.
|
||||
for (size_t i = alignment; i < count; ++i) {
|
||||
EXPECT_GE(written[i], '1');
|
||||
EXPECT_LE(written[i], '2');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace __llvm_libc
|
|
@ -1,231 +0,0 @@
|
|||
//===-- Unittests for memory_utils ----------------------------------------===//
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define LLVM_LIBC_UNITTEST_OBSERVE 1
|
||||
|
||||
#include "src/string/memory_utils/elements.h"
|
||||
#include "utils/CPP/Array.h"
|
||||
#include "utils/CPP/ArrayRef.h"
|
||||
#include "utils/UnitTest/Test.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
namespace __llvm_libc {
|
||||
|
||||
static constexpr const size_t kMaxBuffer = 32;
|
||||
|
||||
struct BufferAccess : cpp::Array<char, kMaxBuffer + 1> {
|
||||
BufferAccess() { Reset(); }
|
||||
void Reset() {
|
||||
for (auto &value : *this)
|
||||
value = '0';
|
||||
this->operator[](kMaxBuffer) = '\0';
|
||||
}
|
||||
void Touch(ptrdiff_t offset, size_t size) {
|
||||
if (offset < 0)
|
||||
return;
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
++(*this)[offset + i];
|
||||
}
|
||||
operator const char *() const { return this->data(); }
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
ptrdiff_t Offset(const char *ptr) const {
|
||||
const bool contained = ptr >= data.begin() && ptr < data.end();
|
||||
return contained ? ptr - data.begin() : -1;
|
||||
}
|
||||
void Reset() {
|
||||
reads.Reset();
|
||||
writes.Reset();
|
||||
}
|
||||
cpp::Array<char, kMaxBuffer> data;
|
||||
BufferAccess __attribute__((aligned(64))) reads;
|
||||
BufferAccess __attribute__((aligned(64))) writes;
|
||||
};
|
||||
|
||||
struct MemoryAccessObserver {
|
||||
void ObserveRead(const char *ptr, size_t size) {
|
||||
Buffer1.reads.Touch(Buffer1.Offset(ptr), size);
|
||||
Buffer2.reads.Touch(Buffer2.Offset(ptr), size);
|
||||
}
|
||||
|
||||
void ObserveWrite(const char *ptr, size_t size) {
|
||||
Buffer1.writes.Touch(Buffer1.Offset(ptr), size);
|
||||
Buffer2.writes.Touch(Buffer2.Offset(ptr), size);
|
||||
}
|
||||
|
||||
void Reset() {
|
||||
Buffer1.Reset();
|
||||
Buffer2.Reset();
|
||||
}
|
||||
|
||||
Buffer Buffer1;
|
||||
Buffer Buffer2;
|
||||
};
|
||||
|
||||
MemoryAccessObserver Observer;
|
||||
|
||||
template <size_t Size> struct TestingElement {
|
||||
static constexpr size_t kSize = Size;
|
||||
|
||||
static void Copy(char *__restrict dst, const char *__restrict src) {
|
||||
Observer.ObserveRead(src, kSize);
|
||||
Observer.ObserveWrite(dst, kSize);
|
||||
}
|
||||
|
||||
static bool Equals(const char *lhs, const char *rhs) {
|
||||
Observer.ObserveRead(lhs, kSize);
|
||||
Observer.ObserveRead(rhs, kSize);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ThreeWayCompare(const char *lhs, const char *rhs) {
|
||||
Observer.ObserveRead(lhs, kSize);
|
||||
Observer.ObserveRead(rhs, kSize);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void SplatSet(char *dst, const unsigned char value) {
|
||||
Observer.ObserveWrite(dst, kSize);
|
||||
}
|
||||
};
|
||||
|
||||
using Types = testing::TypeList<
|
||||
TestingElement<1>, // 1 Byte
|
||||
TestingElement<2>, // 2 Bytes
|
||||
TestingElement<4>, // 4 Bytes
|
||||
Repeated<TestingElement<2>, 3>, // 6 Bytes
|
||||
Chained<TestingElement<4>, TestingElement<2>, TestingElement<1>> // 7 Bytes
|
||||
>;
|
||||
|
||||
struct LlvmLibcTestAccessBase : public testing::Test {
|
||||
|
||||
template <typename HigherOrder, size_t Size, size_t Offset = 0>
|
||||
void checkOperations(const BufferAccess &expected) {
|
||||
static const BufferAccess untouched;
|
||||
|
||||
Observer.Reset();
|
||||
HigherOrder::Copy(dst_ptr() + Offset, src_ptr() + Offset, Size);
|
||||
ASSERT_STREQ(src().writes, untouched);
|
||||
ASSERT_STREQ(dst().reads, untouched);
|
||||
ASSERT_STREQ(src().reads, expected);
|
||||
ASSERT_STREQ(dst().writes, expected);
|
||||
Observer.Reset();
|
||||
HigherOrder::Equals(lhs_ptr() + Offset, rhs_ptr() + Offset, Size);
|
||||
ASSERT_STREQ(lhs().writes, untouched);
|
||||
ASSERT_STREQ(rhs().writes, untouched);
|
||||
ASSERT_STREQ(lhs().reads, expected);
|
||||
ASSERT_STREQ(rhs().reads, expected);
|
||||
Observer.Reset();
|
||||
HigherOrder::ThreeWayCompare(lhs_ptr() + Offset, rhs_ptr() + Offset, Size);
|
||||
ASSERT_STREQ(lhs().writes, untouched);
|
||||
ASSERT_STREQ(rhs().writes, untouched);
|
||||
ASSERT_STREQ(lhs().reads, expected);
|
||||
ASSERT_STREQ(rhs().reads, expected);
|
||||
Observer.Reset();
|
||||
HigherOrder::SplatSet(dst_ptr() + Offset, 5, Size);
|
||||
ASSERT_STREQ(src().reads, untouched);
|
||||
ASSERT_STREQ(src().writes, untouched);
|
||||
ASSERT_STREQ(dst().reads, untouched);
|
||||
ASSERT_STREQ(dst().writes, expected);
|
||||
}
|
||||
|
||||
void checkMaxAccess(const BufferAccess &expected, int max) {
|
||||
for (size_t i = 0; i < kMaxBuffer; ++i) {
|
||||
int value = (int)expected[i] - '0';
|
||||
if (value < 0 || value > max) {
|
||||
printf("expected no more than %d access, was '%s'\n", max,
|
||||
(const char *)expected);
|
||||
ASSERT_LE(value, max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const Buffer &lhs() const { return Observer.Buffer1; }
|
||||
const Buffer &rhs() const { return Observer.Buffer2; }
|
||||
const Buffer &src() const { return Observer.Buffer2; }
|
||||
const Buffer &dst() const { return Observer.Buffer1; }
|
||||
Buffer &dst() { return Observer.Buffer1; }
|
||||
|
||||
char *dst_ptr() { return dst().data.begin(); }
|
||||
const char *src_ptr() { return src().data.begin(); }
|
||||
const char *lhs_ptr() { return lhs().data.begin(); }
|
||||
const char *rhs_ptr() { return rhs().data.begin(); }
|
||||
};
|
||||
|
||||
template <typename ParamType>
|
||||
struct LlvmLibcTestAccessTail : public LlvmLibcTestAccessBase {
|
||||
|
||||
void TearDown() override {
|
||||
static constexpr size_t Size = 10;
|
||||
|
||||
BufferAccess expected;
|
||||
expected.Touch(Size - ParamType::kSize, ParamType::kSize);
|
||||
|
||||
checkMaxAccess(expected, 1);
|
||||
checkOperations<Tail<ParamType>, Size>(expected);
|
||||
}
|
||||
};
|
||||
TYPED_TEST_F(LlvmLibcTestAccessTail, Operations, Types) {}
|
||||
|
||||
template <typename ParamType>
|
||||
struct LlvmLibcTestAccessHeadTail : public LlvmLibcTestAccessBase {
|
||||
void TearDown() override {
|
||||
static constexpr size_t Size = 10;
|
||||
|
||||
BufferAccess expected;
|
||||
expected.Touch(0, ParamType::kSize);
|
||||
expected.Touch(Size - ParamType::kSize, ParamType::kSize);
|
||||
|
||||
checkMaxAccess(expected, 2);
|
||||
checkOperations<HeadTail<ParamType>, Size>(expected);
|
||||
}
|
||||
};
|
||||
TYPED_TEST_F(LlvmLibcTestAccessHeadTail, Operations, Types) {}
|
||||
|
||||
template <typename ParamType>
|
||||
struct LlvmLibcTestAccessLoop : public LlvmLibcTestAccessBase {
|
||||
void TearDown() override {
|
||||
static constexpr size_t Size = 20;
|
||||
|
||||
BufferAccess expected;
|
||||
for (size_t i = 0; i < Size - ParamType::kSize; i += ParamType::kSize)
|
||||
expected.Touch(i, ParamType::kSize);
|
||||
expected.Touch(Size - ParamType::kSize, ParamType::kSize);
|
||||
|
||||
checkMaxAccess(expected, 2);
|
||||
checkOperations<Loop<ParamType>, Size>(expected);
|
||||
}
|
||||
};
|
||||
TYPED_TEST_F(LlvmLibcTestAccessLoop, Operations, Types) {}
|
||||
|
||||
template <typename ParamType>
|
||||
struct LlvmLibcTestAccessAlignedAccess : public LlvmLibcTestAccessBase {
|
||||
void TearDown() override {
|
||||
static constexpr size_t Size = 10;
|
||||
static constexpr size_t Offset = 2;
|
||||
using AlignmentT = TestingElement<4>;
|
||||
|
||||
BufferAccess expected;
|
||||
expected.Touch(Offset, AlignmentT::kSize);
|
||||
expected.Touch(AlignmentT::kSize, ParamType::kSize);
|
||||
expected.Touch(Offset + Size - ParamType::kSize, ParamType::kSize);
|
||||
|
||||
checkMaxAccess(expected, 3);
|
||||
checkOperations<Align<AlignmentT, Arg::_1>::Then<HeadTail<ParamType>>, Size,
|
||||
Offset>(expected);
|
||||
checkOperations<Align<AlignmentT, Arg::_2>::Then<HeadTail<ParamType>>, Size,
|
||||
Offset>(expected);
|
||||
}
|
||||
};
|
||||
TYPED_TEST_F(LlvmLibcTestAccessAlignedAccess, Operations, Types) {}
|
||||
|
||||
} // namespace __llvm_libc
|
Loading…
Reference in New Issue