[sanitizer] optimize internal_memset for the most performance critical case (16-byte-aligned)

llvm-svn: 341420
2018-09-04 22:43:30 +00:00 · 2018-09-04 22:43:30 +00:00 · 470db78115
parent ec277a8278
commit 470db78115
1 changed files with 12 additions and 0 deletions
--- a/compiler-rt/lib/sanitizer_common/sanitizer_libc.cc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_libc.cc
@ -73,6 +73,18 @@ void *internal_memmove(void *dest, const void *src, uptr n) {
 }

 void *internal_memset(void* s, int c, uptr n) {
+  // Optimize for the most performance-critical case:
+  if ((reinterpret_cast<uptr>(s) % 16) == 0 && (n % 16) == 0) {
+    u64 *p = reinterpret_cast<u64*>(s);
+    u64 *e = p + n / 8;
+    u64 v = c;
+    v |= v << 8;
+    v |= v << 16;
+    v |= v << 32;
+    for (; p < e; p += 2)
+      p[0] = p[1] = v;
+    return s;
+  }
  // The next line prevents Clang from making a call to memset() instead of the
  // loop below.
  // FIXME: building the runtime with -ffreestanding is a better idea. However