forked from OSchip/llvm-project
Prefer AVX512 memcpy when applicable
When AVX512 is available and the preferred vector width is 512-bits or more, we should prefer AVX512 for memcpy(). https://bugs.llvm.org/show_bug.cgi?id=43240 https://reviews.llvm.org/D67874 llvm-svn: 372540
This commit is contained in:
parent
d8ac51ab8f
commit
a7a515cb77
|
@ -2149,6 +2149,11 @@ EVT X86TargetLowering::getOptimalMemOpType(
|
|||
if (Size >= 16 && (!Subtarget.isUnalignedMem16Slow() ||
|
||||
((DstAlign == 0 || DstAlign >= 16) &&
|
||||
(SrcAlign == 0 || SrcAlign >= 16)))) {
|
||||
// FIXME: Check if unaligned 64-byte accesses are slow.
|
||||
if (Size >= 64 && Subtarget.hasAVX512() &&
|
||||
(Subtarget.getPreferVectorWidth() >= 512)) {
|
||||
return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
|
||||
}
|
||||
// FIXME: Check if unaligned 32-byte accesses are slow.
|
||||
if (Size >= 32 && Subtarget.hasAVX() &&
|
||||
(Subtarget.getPreferVectorWidth() >= 256)) {
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=LINUX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake | FileCheck %s -check-prefix=LINUX-SKL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck %s -check-prefix=LINUX-SKX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s -check-prefix=LINUX-KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=LINUX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake | FileCheck %s -check-prefix=LINUX-SKL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck %s -check-prefix=LINUX-SKX
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s -check-prefix=LINUX-KNL
|
||||
; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw | FileCheck %s -check-prefix=LINUX-AVX512BW
|
||||
|
||||
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
|
||||
|
@ -124,10 +124,8 @@ define void @test3(i8* nocapture %A, i8* nocapture %B) nounwind optsize noredzon
|
|||
;
|
||||
; LINUX-KNL-LABEL: test3:
|
||||
; LINUX-KNL: # %bb.0: # %entry
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %ymm0
|
||||
; LINUX-KNL-NEXT: vmovups 32(%rsi), %ymm1
|
||||
; LINUX-KNL-NEXT: vmovups %ymm1, 32(%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups %ymm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %zmm0
|
||||
; LINUX-KNL-NEXT: vmovups %zmm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: retq
|
||||
;
|
||||
; LINUX-AVX512BW-LABEL: test3:
|
||||
|
@ -174,10 +172,8 @@ define void @test3_minsize(i8* nocapture %A, i8* nocapture %B) nounwind minsize
|
|||
;
|
||||
; LINUX-KNL-LABEL: test3_minsize:
|
||||
; LINUX-KNL: # %bb.0:
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %ymm0
|
||||
; LINUX-KNL-NEXT: vmovups 32(%rsi), %ymm1
|
||||
; LINUX-KNL-NEXT: vmovups %ymm1, 32(%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups %ymm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %zmm0
|
||||
; LINUX-KNL-NEXT: vmovups %zmm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: retq
|
||||
;
|
||||
; LINUX-AVX512BW-LABEL: test3_minsize:
|
||||
|
@ -223,10 +219,8 @@ define void @test3_minsize_optsize(i8* nocapture %A, i8* nocapture %B) nounwind
|
|||
;
|
||||
; LINUX-KNL-LABEL: test3_minsize_optsize:
|
||||
; LINUX-KNL: # %bb.0:
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %ymm0
|
||||
; LINUX-KNL-NEXT: vmovups 32(%rsi), %ymm1
|
||||
; LINUX-KNL-NEXT: vmovups %ymm1, 32(%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups %ymm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %zmm0
|
||||
; LINUX-KNL-NEXT: vmovups %zmm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: retq
|
||||
;
|
||||
; LINUX-AVX512BW-LABEL: test3_minsize_optsize:
|
||||
|
@ -301,10 +295,8 @@ define void @test4(i8* nocapture %A, i8* nocapture %B) nounwind noredzone {
|
|||
;
|
||||
; LINUX-KNL-LABEL: test4:
|
||||
; LINUX-KNL: # %bb.0: # %entry
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %ymm0
|
||||
; LINUX-KNL-NEXT: vmovups 32(%rsi), %ymm1
|
||||
; LINUX-KNL-NEXT: vmovups %ymm1, 32(%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups %ymm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: vmovups (%rsi), %zmm0
|
||||
; LINUX-KNL-NEXT: vmovups %zmm0, (%rdi)
|
||||
; LINUX-KNL-NEXT: retq
|
||||
;
|
||||
; LINUX-AVX512BW-LABEL: test4:
|
||||
|
|
|
@ -1,11 +1,14 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse | FileCheck %s --check-prefix=SSE
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2 | FileCheck %s --check-prefix=SSE
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=sse2,-slow-unaligned-mem-16 | FileCheck %s --check-prefix=SSE2FAST
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512-ymm
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512-ymm
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512dq -mattr=+prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512-ymm
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512f -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
|
||||
; RUN: llc -mtriple=x86_64-unknown-unknown < %s -mattr=avx512bw -mattr=-prefer-256-bit | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
|
||||
|
||||
; https://llvm.org/bugs/show_bug.cgi?id=27100
|
||||
|
||||
|
@ -82,13 +85,44 @@ define void @memset_64_nonzero_bytes(i8* %x) {
|
|||
; SSE2FAST-NEXT: movups %xmm0, (%rdi)
|
||||
; SSE2FAST-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: memset_64_nonzero_bytes:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: memset_64_nonzero_bytes:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: memset_64_nonzero_bytes:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-ymm-LABEL: memset_64_nonzero_bytes:
|
||||
; AVX512-ymm: # %bb.0:
|
||||
; AVX512-ymm-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX512-ymm-NEXT: vzeroupper
|
||||
; AVX512-ymm-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: memset_64_nonzero_bytes:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378]
|
||||
; AVX512F-NEXT: vmovups %zmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: memset_64_nonzero_bytes:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX512BW-NEXT: vmovups %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
; AVX512NW-NEXT: retq
|
||||
%call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 64, i64 -1)
|
||||
ret void
|
||||
}
|
||||
|
@ -128,15 +162,51 @@ define void @memset_128_nonzero_bytes(i8* %x) {
|
|||
; SSE2FAST-NEXT: movups %xmm0, (%rdi)
|
||||
; SSE2FAST-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: memset_128_nonzero_bytes:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: memset_128_nonzero_bytes:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: memset_128_nonzero_bytes:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-ymm-LABEL: memset_128_nonzero_bytes:
|
||||
; AVX512-ymm: # %bb.0:
|
||||
; AVX512-ymm-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX512-ymm-NEXT: vzeroupper
|
||||
; AVX512-ymm-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: memset_128_nonzero_bytes:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378]
|
||||
; AVX512F-NEXT: vmovups %zmm0, 64(%rdi)
|
||||
; AVX512F-NEXT: vmovups %zmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: memset_128_nonzero_bytes:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi)
|
||||
; AVX512BW-NEXT: vmovups %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 128, i64 -1)
|
||||
ret void
|
||||
}
|
||||
|
@ -174,19 +244,67 @@ define void @memset_256_nonzero_bytes(i8* %x) {
|
|||
; SSE2FAST-NEXT: movups %xmm0, (%rdi)
|
||||
; SSE2FAST-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: memset_256_nonzero_bytes:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX-NEXT: vmovups %ymm0, 224(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 192(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 160(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 128(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX-NEXT: vzeroupper
|
||||
; AVX-NEXT: retq
|
||||
; AVX1-LABEL: memset_256_nonzero_bytes:
|
||||
; AVX1: # %bb.0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX1-NEXT: vmovups %ymm0, 224(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 192(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 160(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 128(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX1-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX1-NEXT: vzeroupper
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: memset_256_nonzero_bytes:
|
||||
; AVX2: # %bb.0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX2-NEXT: vmovups %ymm0, 224(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 192(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 160(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 128(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX2-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-ymm-LABEL: memset_256_nonzero_bytes:
|
||||
; AVX512-ymm: # %bb.0:
|
||||
; AVX512-ymm-NEXT: vmovaps {{.*#+}} ymm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 224(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 192(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 160(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 128(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 96(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 64(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovups %ymm0, (%rdi)
|
||||
; AVX512-ymm-NEXT: vzeroupper
|
||||
; AVX512-ymm-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: memset_256_nonzero_bytes:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: vbroadcastss {{.*#+}} zmm0 = [707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378,707406378]
|
||||
; AVX512F-NEXT: vmovups %zmm0, 192(%rdi)
|
||||
; AVX512F-NEXT: vmovups %zmm0, 128(%rdi)
|
||||
; AVX512F-NEXT: vmovups %zmm0, 64(%rdi)
|
||||
; AVX512F-NEXT: vmovups %zmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: memset_256_nonzero_bytes:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vmovaps {{.*#+}} zmm0 = [42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42,42]
|
||||
; AVX512BW-NEXT: vmovups %zmm0, 192(%rdi)
|
||||
; AVX512BW-NEXT: vmovups %zmm0, 128(%rdi)
|
||||
; AVX512BW-NEXT: vmovups %zmm0, 64(%rdi)
|
||||
; AVX512BW-NEXT: vmovups %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
%call = tail call i8* @__memset_chk(i8* %x, i32 42, i64 256, i64 -1)
|
||||
ret void
|
||||
}
|
||||
|
@ -340,14 +458,30 @@ define void @memset_64_nonconst_bytes(i8* %x, i8 %c) {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: memset_64_nonconst_bytes:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd %esi, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-ymm-LABEL: memset_64_nonconst_bytes:
|
||||
; AVX512-ymm: # %bb.0:
|
||||
; AVX512-ymm-NEXT: vmovd %esi, %xmm0
|
||||
; AVX512-ymm-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; AVX512-ymm-NEXT: vzeroupper
|
||||
; AVX512-ymm-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: memset_64_nonconst_bytes:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movzbl %sil, %eax
|
||||
; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
|
||||
; AVX512F-NEXT: vpbroadcastd %eax, %zmm0
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: memset_64_nonconst_bytes:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 64, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
@ -417,16 +551,34 @@ define void @memset_128_nonconst_bytes(i8* %x, i8 %c) {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: memset_128_nonconst_bytes:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd %esi, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 96(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 64(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-ymm-LABEL: memset_128_nonconst_bytes:
|
||||
; AVX512-ymm: # %bb.0:
|
||||
; AVX512-ymm-NEXT: vmovd %esi, %xmm0
|
||||
; AVX512-ymm-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 96(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 64(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; AVX512-ymm-NEXT: vzeroupper
|
||||
; AVX512-ymm-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: memset_128_nonconst_bytes:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movzbl %sil, %eax
|
||||
; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
|
||||
; AVX512F-NEXT: vpbroadcastd %eax, %zmm0
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi)
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: memset_128_nonconst_bytes:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi)
|
||||
; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 128, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
@ -493,20 +645,42 @@ define void @memset_256_nonconst_bytes(i8* %x, i8 %c) {
|
|||
; AVX2-NEXT: vzeroupper
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512-LABEL: memset_256_nonconst_bytes:
|
||||
; AVX512: # %bb.0:
|
||||
; AVX512-NEXT: vmovd %esi, %xmm0
|
||||
; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 224(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 192(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 160(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 128(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 96(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 64(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX512-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; AVX512-NEXT: vzeroupper
|
||||
; AVX512-NEXT: retq
|
||||
; AVX512-ymm-LABEL: memset_256_nonconst_bytes:
|
||||
; AVX512-ymm: # %bb.0:
|
||||
; AVX512-ymm-NEXT: vmovd %esi, %xmm0
|
||||
; AVX512-ymm-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 224(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 192(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 160(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 128(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 96(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 64(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, 32(%rdi)
|
||||
; AVX512-ymm-NEXT: vmovdqu %ymm0, (%rdi)
|
||||
; AVX512-ymm-NEXT: vzeroupper
|
||||
; AVX512-ymm-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: memset_256_nonconst_bytes:
|
||||
; AVX512F: # %bb.0:
|
||||
; AVX512F-NEXT: movzbl %sil, %eax
|
||||
; AVX512F-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101
|
||||
; AVX512F-NEXT: vpbroadcastd %eax, %zmm0
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, 192(%rdi)
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, 128(%rdi)
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, 64(%rdi)
|
||||
; AVX512F-NEXT: vmovdqu64 %zmm0, (%rdi)
|
||||
; AVX512F-NEXT: vzeroupper
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: memset_256_nonconst_bytes:
|
||||
; AVX512BW: # %bb.0:
|
||||
; AVX512BW-NEXT: vpbroadcastb %esi, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqu64 %zmm0, 192(%rdi)
|
||||
; AVX512BW-NEXT: vmovdqu64 %zmm0, 128(%rdi)
|
||||
; AVX512BW-NEXT: vmovdqu64 %zmm0, 64(%rdi)
|
||||
; AVX512BW-NEXT: vmovdqu64 %zmm0, (%rdi)
|
||||
; AVX512BW-NEXT: vzeroupper
|
||||
; AVX512BW-NEXT: retq
|
||||
tail call void @llvm.memset.p0i8.i64(i8* %x, i8 %c, i64 256, i1 false)
|
||||
ret void
|
||||
}
|
||||
|
|
|
@ -752,8 +752,7 @@ define void @memset_64(i8* %a) nounwind {
|
|||
; KNL-LABEL: memset_64:
|
||||
; KNL: # %bb.0: # %entry
|
||||
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovups %ymm0, 32(%rdi)
|
||||
; KNL-NEXT: vmovups %ymm0, (%rdi)
|
||||
; KNL-NEXT: vmovups %zmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
entry:
|
||||
call void @llvm.memset.p0i8.i64(i8* %a, i8 0, i64 64, i1 false)
|
||||
|
@ -819,8 +818,7 @@ define void @memset_64_align64(i8* %a) nounwind {
|
|||
; KNL-LABEL: memset_64_align64:
|
||||
; KNL: # %bb.0: # %entry
|
||||
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; KNL-NEXT: vmovaps %ymm0, 32(%rdi)
|
||||
; KNL-NEXT: vmovaps %ymm0, (%rdi)
|
||||
; KNL-NEXT: vmovaps %zmm0, (%rdi)
|
||||
; KNL-NEXT: retq
|
||||
entry:
|
||||
call void @llvm.memset.p0i8.i64(i8* align 64 %a, i8 0, i64 64, i1 false)
|
||||
|
|
Loading…
Reference in New Issue