forked from OSchip/llvm-project
[AVX-512] Fix duplicate column in AVX512 execution dependency table that was preventing VMOVDQU32/VMOVDQA32 from being recognized. Fix a bug in the code that stops execution dependency fix from turning operations on 32-bit integer element types into operations on 64-bit integer element types.
llvm-svn: 277327
This commit is contained in:
parent
ddc96cd33d
commit
c48c029610
|
@ -7318,22 +7318,22 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
|
|||
|
||||
static const uint16_t ReplaceableInstrsAVX512[][4] = {
|
||||
// Two integer columns for 64-bit and 32-bit elements.
|
||||
//PackedSingle PackedDouble PackedInt PackedInt
|
||||
{ X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA64Z128mr },
|
||||
{ X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA64Z128rm },
|
||||
{ X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA64Z128rr },
|
||||
{ X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU64Z128mr },
|
||||
{ X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU64Z128rm },
|
||||
{ X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA64Z256mr },
|
||||
{ X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA64Z256rm },
|
||||
{ X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA64Z256rr },
|
||||
{ X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU64Z256mr },
|
||||
{ X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU64Z256rm },
|
||||
{ X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA64Zmr },
|
||||
{ X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA64Zrm },
|
||||
{ X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA64Zrr },
|
||||
{ X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU64Zmr },
|
||||
{ X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU64Zrm },
|
||||
//PackedSingle PackedDouble PackedInt PackedInt
|
||||
{ X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr },
|
||||
{ X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm },
|
||||
{ X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr },
|
||||
{ X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr },
|
||||
{ X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm },
|
||||
{ X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr },
|
||||
{ X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm },
|
||||
{ X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr },
|
||||
{ X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr },
|
||||
{ X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm },
|
||||
{ X86::VMOVAPSZmr, X86::VMOVAPDZmr, X86::VMOVDQA64Zmr, X86::VMOVDQA32Zmr },
|
||||
{ X86::VMOVAPSZrm, X86::VMOVAPDZrm, X86::VMOVDQA64Zrm, X86::VMOVDQA32Zrm },
|
||||
{ X86::VMOVAPSZrr, X86::VMOVAPDZrr, X86::VMOVDQA64Zrr, X86::VMOVDQA32Zrr },
|
||||
{ X86::VMOVUPSZmr, X86::VMOVUPDZmr, X86::VMOVDQU64Zmr, X86::VMOVDQU32Zmr },
|
||||
{ X86::VMOVUPSZrm, X86::VMOVUPDZrm, X86::VMOVDQU64Zrm, X86::VMOVDQU32Zrm },
|
||||
};
|
||||
|
||||
static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
|
||||
|
@ -7427,14 +7427,14 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
|
|||
assert(Subtarget.hasAVX512() && "Requires AVX-512");
|
||||
table = lookupAVX512(MI.getOpcode(), dom);
|
||||
// Don't change integer Q instructions to D instructions.
|
||||
if (table && dom == 3 && table[3] == MI.getOpcode())
|
||||
if (table && Domain == 3 && table[3] == MI.getOpcode())
|
||||
Domain = 4;
|
||||
}
|
||||
if (!table) { // try the AVX512DQ table
|
||||
assert((Subtarget.hasDQI() || Domain >=3) && "Requires AVX-512DQ");
|
||||
table = lookupAVX512DQ(MI.getOpcode(), dom);
|
||||
// Don't change integer Q instructions to D instructions.
|
||||
if (table && dom == 3 && table[3] == MI.getOpcode())
|
||||
if (table && Domain == 3 && table[3] == MI.getOpcode())
|
||||
Domain = 4;
|
||||
}
|
||||
assert(table && "Cannot change domain");
|
||||
|
|
|
@ -603,10 +603,30 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
|
|||
}
|
||||
|
||||
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
||||
; CHECK-LABEL: andd512fold:
|
||||
; CHECK: ## BB#0: ## %entry
|
||||
; CHECK-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
; AVX512F-LABEL: andd512fold:
|
||||
; AVX512F: ## BB#0: ## %entry
|
||||
; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: andd512fold:
|
||||
; AVX512VL: ## BB#0: ## %entry
|
||||
; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: andd512fold:
|
||||
; AVX512BW: ## BB#0: ## %entry
|
||||
; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512DQ-LABEL: andd512fold:
|
||||
; AVX512DQ: ## BB#0: ## %entry
|
||||
; AVX512DQ-NEXT: vandps (%rdi), %zmm0, %zmm0
|
||||
; AVX512DQ-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: andd512fold:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%a = load <16 x i32>, <16 x i32>* %x, align 4
|
||||
%b = and <16 x i32> %y, %a
|
||||
|
|
|
@ -9,8 +9,8 @@ define void @bar__512(<16 x i32>* %var) #0 {
|
|||
; CHECK-NEXT: pushq %rbx
|
||||
; CHECK-NEXT: subq $112, %rsp
|
||||
; CHECK-NEXT: movq %rdi, %rbx
|
||||
; CHECK-NEXT: vmovdqu32 (%rbx), %zmm0
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, (%rsp) ## 64-byte Spill
|
||||
; CHECK-NEXT: vmovups (%rbx), %zmm0
|
||||
; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill
|
||||
; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %zmm1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm1, (%rbx)
|
||||
; CHECK-NEXT: callq _Print__512
|
||||
|
|
|
@ -4,15 +4,10 @@
|
|||
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
|
||||
|
||||
define <16 x i1> @test1() {
|
||||
; KNL-LABEL: test1:
|
||||
; KNL: ## BB#0:
|
||||
; KNL-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test1:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %xmm0, %xmm0, %xmm0
|
||||
; SKX-NEXT: retq
|
||||
; ALL_X64-LABEL: test1:
|
||||
; ALL_X64: ## BB#0:
|
||||
; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
|
||||
; ALL_X64-NEXT: retq
|
||||
;
|
||||
; KNL_X32-LABEL: test1:
|
||||
; KNL_X32: ## BB#0:
|
||||
|
|
|
@ -761,7 +761,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
|
|||
;
|
||||
; SKX-LABEL: sitofp_16i1_double:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
||||
; SKX-NEXT: vxorpd %zmm2, %zmm2, %zmm2
|
||||
; SKX-NEXT: vcmpltpd %zmm1, %zmm2, %k0
|
||||
; SKX-NEXT: vcmpltpd %zmm0, %zmm2, %k1
|
||||
; SKX-NEXT: vpmovm2d %k1, %ymm0
|
||||
|
@ -787,7 +787,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
|
|||
;
|
||||
; SKX-LABEL: sitofp_8i1_double:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %zmm1, %zmm1, %zmm1
|
||||
; SKX-NEXT: vxorpd %zmm1, %zmm1, %zmm1
|
||||
; SKX-NEXT: vcmpltpd %zmm0, %zmm1, %k0
|
||||
; SKX-NEXT: vpmovm2d %k0, %ymm0
|
||||
; SKX-NEXT: vcvtdq2pd %ymm0, %zmm0
|
||||
|
@ -811,7 +811,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
|
|||
;
|
||||
; SKX-LABEL: sitofp_8i1_float:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
|
||||
; SKX-NEXT: vxorps %ymm1, %ymm1, %ymm1
|
||||
; SKX-NEXT: vcmpltps %ymm0, %ymm1, %k0
|
||||
; SKX-NEXT: vpmovm2d %k0, %ymm0
|
||||
; SKX-NEXT: vcvtdq2ps %ymm0, %ymm0
|
||||
|
@ -831,7 +831,7 @@ define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
|
|||
;
|
||||
; SKX-LABEL: sitofp_4i1_float:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
|
||||
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
||||
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
|
@ -854,7 +854,7 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
|
|||
;
|
||||
; SKX-LABEL: sitofp_4i1_double:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %ymm1, %ymm1, %ymm1
|
||||
; SKX-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
||||
; SKX-NEXT: vcmpltpd %ymm0, %ymm1, %k0
|
||||
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
||||
; SKX-NEXT: vcvtdq2pd %xmm0, %ymm0
|
||||
|
@ -890,7 +890,7 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
|
|||
;
|
||||
; SKX-LABEL: sitofp_2i1_float:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vcmpltps %xmm0, %xmm1, %k0
|
||||
; SKX-NEXT: vpmovm2d %k0, %xmm0
|
||||
; SKX-NEXT: vcvtdq2ps %xmm0, %xmm0
|
||||
|
@ -911,7 +911,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
|
|||
;
|
||||
; SKX-LABEL: sitofp_2i1_double:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
||||
; SKX-NEXT: vcmpltpd %xmm0, %xmm1, %k0
|
||||
; SKX-NEXT: vpmovm2q %k0, %xmm0
|
||||
; SKX-NEXT: vcvtqq2pd %xmm0, %xmm0
|
||||
|
|
|
@ -156,7 +156,7 @@ entry:
|
|||
define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector256_v2i64_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu64 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
@ -168,7 +168,7 @@ entry:
|
|||
define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector256_v4i32_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -180,7 +180,7 @@ entry:
|
|||
define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector256_v8i16_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
@ -192,7 +192,7 @@ entry:
|
|||
define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector256_v16i8_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
@ -228,7 +228,7 @@ entry:
|
|||
define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v2i64_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu64 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
@ -240,7 +240,7 @@ entry:
|
|||
define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v4i32_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -252,7 +252,7 @@ entry:
|
|||
define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v8i16_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
@ -264,7 +264,7 @@ entry:
|
|||
define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v16i8_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %xmm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %xmm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
@ -300,7 +300,7 @@ entry:
|
|||
define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v4i64_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu64 %ymm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
|
@ -312,7 +312,7 @@ entry:
|
|||
define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v8i32_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %ymm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
|
@ -324,7 +324,7 @@ entry:
|
|||
define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v16i16_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %ymm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
|
@ -336,7 +336,7 @@ entry:
|
|||
define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
|
||||
; SKX-LABEL: extract_subvector512_v32i8_store_lo:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vmovdqu32 %ymm0, (%rdi)
|
||||
; SKX-NEXT: vmovups %ymm0, (%rdi)
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
|
||||
|
|
|
@ -125,10 +125,15 @@ define <8 x i64> @orq_broadcast(<8 x i64> %a) nounwind {
|
|||
}
|
||||
|
||||
define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) {
|
||||
; ALL-LABEL: andd512fold:
|
||||
; ALL: ## BB#0: ## %entry
|
||||
; ALL-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
||||
; ALL-NEXT: retq
|
||||
; KNL-LABEL: andd512fold:
|
||||
; KNL: ## BB#0: ## %entry
|
||||
; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: andd512fold:
|
||||
; SKX: ## BB#0: ## %entry
|
||||
; SKX-NEXT: vandps (%rdi), %zmm0, %zmm0
|
||||
; SKX-NEXT: retq
|
||||
entry:
|
||||
%a = load <16 x i32>, <16 x i32>* %x, align 4
|
||||
%b = and <16 x i32> %y, %a
|
||||
|
|
|
@ -151,7 +151,7 @@ define <4 x i32> @test15(i32* %x) {
|
|||
define <16 x i32> @test16(i8 * %addr) {
|
||||
; CHECK-LABEL: test16:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7e,0x48,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <16 x i32>*
|
||||
%res = load <16 x i32>, <16 x i32>* %vaddr, align 1
|
||||
|
@ -161,7 +161,7 @@ define <16 x i32> @test16(i8 * %addr) {
|
|||
define <16 x i32> @test17(i8 * %addr) {
|
||||
; CHECK-LABEL: test17:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <16 x i32>*
|
||||
%res = load <16 x i32>, <16 x i32>* %vaddr, align 64
|
||||
|
@ -171,7 +171,7 @@ define <16 x i32> @test17(i8 * %addr) {
|
|||
define void @test18(i8 * %addr, <8 x i64> %data) {
|
||||
; CHECK-LABEL: test18:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i64>*
|
||||
store <8 x i64>%data, <8 x i64>* %vaddr, align 64
|
||||
|
@ -181,7 +181,7 @@ define void @test18(i8 * %addr, <8 x i64> %data) {
|
|||
define void @test19(i8 * %addr, <16 x i32> %data) {
|
||||
; CHECK-LABEL: test19:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x48,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <16 x i32>*
|
||||
store <16 x i32>%data, <16 x i32>* %vaddr, align 1
|
||||
|
@ -191,7 +191,7 @@ define void @test19(i8 * %addr, <16 x i32> %data) {
|
|||
define void @test20(i8 * %addr, <16 x i32> %data) {
|
||||
; CHECK-LABEL: test20:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x48,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovaps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <16 x i32>*
|
||||
store <16 x i32>%data, <16 x i32>* %vaddr, align 64
|
||||
|
@ -201,7 +201,7 @@ define void @test20(i8 * %addr, <16 x i32> %data) {
|
|||
define <8 x i64> @test21(i8 * %addr) {
|
||||
; CHECK-LABEL: test21:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i64>*
|
||||
%res = load <8 x i64>, <8 x i64>* %vaddr, align 64
|
||||
|
@ -211,7 +211,7 @@ define <8 x i64> @test21(i8 * %addr) {
|
|||
define void @test22(i8 * %addr, <8 x i64> %data) {
|
||||
; CHECK-LABEL: test22:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovups %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i64>*
|
||||
store <8 x i64>%data, <8 x i64>* %vaddr, align 1
|
||||
|
@ -221,7 +221,7 @@ define void @test22(i8 * %addr, <8 x i64> %data) {
|
|||
define <8 x i64> @test23(i8 * %addr) {
|
||||
; CHECK-LABEL: test23:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i64>*
|
||||
%res = load <8 x i64>, <8 x i64>* %vaddr, align 1
|
||||
|
|
|
@ -79,7 +79,7 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
|
|||
;
|
||||
; SKX-LABEL: test7:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
@ -99,7 +99,7 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
|
|||
;
|
||||
; SKX-LABEL: test8:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxord %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
define <8 x i32> @test_256_1(i8 * %addr) {
|
||||
; CHECK-LABEL: test_256_1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i32>*
|
||||
%res = load <8 x i32>, <8 x i32>* %vaddr, align 1
|
||||
|
@ -14,7 +14,7 @@ define <8 x i32> @test_256_1(i8 * %addr) {
|
|||
define <8 x i32> @test_256_2(i8 * %addr) {
|
||||
; CHECK-LABEL: test_256_2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i32>*
|
||||
%res = load <8 x i32>, <8 x i32>* %vaddr, align 32
|
||||
|
@ -24,7 +24,7 @@ define <8 x i32> @test_256_2(i8 * %addr) {
|
|||
define void @test_256_3(i8 * %addr, <4 x i64> %data) {
|
||||
; CHECK-LABEL: test_256_3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i64>*
|
||||
store <4 x i64>%data, <4 x i64>* %vaddr, align 32
|
||||
|
@ -34,7 +34,7 @@ define void @test_256_3(i8 * %addr, <4 x i64> %data) {
|
|||
define void @test_256_4(i8 * %addr, <8 x i32> %data) {
|
||||
; CHECK-LABEL: test_256_4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i32>*
|
||||
store <8 x i32>%data, <8 x i32>* %vaddr, align 1
|
||||
|
@ -44,7 +44,7 @@ define void @test_256_4(i8 * %addr, <8 x i32> %data) {
|
|||
define void @test_256_5(i8 * %addr, <8 x i32> %data) {
|
||||
; CHECK-LABEL: test_256_5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovaps %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <8 x i32>*
|
||||
store <8 x i32>%data, <8 x i32>* %vaddr, align 32
|
||||
|
@ -54,7 +54,7 @@ define void @test_256_5(i8 * %addr, <8 x i32> %data) {
|
|||
define <4 x i64> @test_256_6(i8 * %addr) {
|
||||
; CHECK-LABEL: test_256_6:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i64>*
|
||||
%res = load <4 x i64>, <4 x i64>* %vaddr, align 32
|
||||
|
@ -64,7 +64,7 @@ define <4 x i64> @test_256_6(i8 * %addr) {
|
|||
define void @test_256_7(i8 * %addr, <4 x i64> %data) {
|
||||
; CHECK-LABEL: test_256_7:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu64 %ymm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovups %ymm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i64>*
|
||||
store <4 x i64>%data, <4 x i64>* %vaddr, align 1
|
||||
|
@ -74,7 +74,7 @@ define void @test_256_7(i8 * %addr, <4 x i64> %data) {
|
|||
define <4 x i64> @test_256_8(i8 * %addr) {
|
||||
; CHECK-LABEL: test_256_8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i64>*
|
||||
%res = load <4 x i64>, <4 x i64>* %vaddr, align 1
|
||||
|
@ -392,7 +392,7 @@ define <4 x double> @test_256_32(i8 * %addr, <4 x i64> %mask1) {
|
|||
define <4 x i32> @test_128_1(i8 * %addr) {
|
||||
; CHECK-LABEL: test_128_1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i32>*
|
||||
%res = load <4 x i32>, <4 x i32>* %vaddr, align 1
|
||||
|
@ -402,7 +402,7 @@ define <4 x i32> @test_128_1(i8 * %addr) {
|
|||
define <4 x i32> @test_128_2(i8 * %addr) {
|
||||
; CHECK-LABEL: test_128_2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i32>*
|
||||
%res = load <4 x i32>, <4 x i32>* %vaddr, align 16
|
||||
|
@ -412,7 +412,7 @@ define <4 x i32> @test_128_2(i8 * %addr) {
|
|||
define void @test_128_3(i8 * %addr, <2 x i64> %data) {
|
||||
; CHECK-LABEL: test_128_3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <2 x i64>*
|
||||
store <2 x i64>%data, <2 x i64>* %vaddr, align 16
|
||||
|
@ -422,7 +422,7 @@ define void @test_128_3(i8 * %addr, <2 x i64> %data) {
|
|||
define void @test_128_4(i8 * %addr, <4 x i32> %data) {
|
||||
; CHECK-LABEL: test_128_4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i32>*
|
||||
store <4 x i32>%data, <4 x i32>* %vaddr, align 1
|
||||
|
@ -432,7 +432,7 @@ define void @test_128_4(i8 * %addr, <4 x i32> %data) {
|
|||
define void @test_128_5(i8 * %addr, <4 x i32> %data) {
|
||||
; CHECK-LABEL: test_128_5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovaps %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <4 x i32>*
|
||||
store <4 x i32>%data, <4 x i32>* %vaddr, align 16
|
||||
|
@ -442,7 +442,7 @@ define void @test_128_5(i8 * %addr, <4 x i32> %data) {
|
|||
define <2 x i64> @test_128_6(i8 * %addr) {
|
||||
; CHECK-LABEL: test_128_6:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <2 x i64>*
|
||||
%res = load <2 x i64>, <2 x i64>* %vaddr, align 16
|
||||
|
@ -452,7 +452,7 @@ define <2 x i64> @test_128_6(i8 * %addr) {
|
|||
define void @test_128_7(i8 * %addr, <2 x i64> %data) {
|
||||
; CHECK-LABEL: test_128_7:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu64 %xmm0, (%rdi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x07]
|
||||
; CHECK-NEXT: vmovups %xmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <2 x i64>*
|
||||
store <2 x i64>%data, <2 x i64>* %vaddr, align 1
|
||||
|
@ -462,7 +462,7 @@ define void @test_128_7(i8 * %addr, <2 x i64> %data) {
|
|||
define <2 x i64> @test_128_8(i8 * %addr) {
|
||||
; CHECK-LABEL: test_128_8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07]
|
||||
; CHECK-NEXT: vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
|
||||
; CHECK-NEXT: retq ## encoding: [0xc3]
|
||||
%vaddr = bitcast i8* %addr to <2 x i64>*
|
||||
%res = load <2 x i64>, <2 x i64>* %vaddr, align 1
|
||||
|
|
|
@ -1131,7 +1131,7 @@ define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 {
|
|||
;
|
||||
; AVX512-LABEL: test_v4f32_fneg_fmul:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpxord %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; AVX512-NEXT: vfnmsub213ps %xmm2, %xmm1, %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%m = fmul nsz <4 x float> %x, %y
|
||||
|
@ -1154,7 +1154,7 @@ define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 {
|
|||
;
|
||||
; AVX512-LABEL: test_v4f64_fneg_fmul:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpxord %ymm2, %ymm2, %ymm2
|
||||
; AVX512-NEXT: vxorpd %ymm2, %ymm2, %ymm2
|
||||
; AVX512-NEXT: vfnmsub213pd %ymm2, %ymm1, %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%m = fmul nsz <4 x double> %x, %y
|
||||
|
|
|
@ -749,7 +749,7 @@ define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0
|
|||
;
|
||||
; AVX512-LABEL: test_v16f32_fneg_fmul:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vxorps %zmm2, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%m = fmul nsz <16 x float> %x, %y
|
||||
|
@ -774,7 +774,7 @@ define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 {
|
|||
;
|
||||
; AVX512-LABEL: test_v8f64_fneg_fmul:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vxorpd %zmm2, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%m = fmul nsz <8 x double> %x, %y
|
||||
|
|
|
@ -419,13 +419,13 @@ define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(i32* %ptr) nounwind uwtable
|
|||
define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(i32* %ptr) nounwind uwtable noinline ssp {
|
||||
; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovdqu32 (%rdi), %zmm0
|
||||
; ALL-NEXT: vmovups (%rdi), %zmm0
|
||||
; ALL-NEXT: retq
|
||||
;
|
||||
; X32-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
|
||||
; X32-AVX512F: # BB#0:
|
||||
; X32-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||
; X32-AVX512F-NEXT: vmovdqu32 (%eax), %zmm0
|
||||
; X32-AVX512F-NEXT: vmovups (%eax), %zmm0
|
||||
; X32-AVX512F-NEXT: retl
|
||||
%ptr0 = getelementptr inbounds i32, i32* %ptr, i64 0
|
||||
%ptr3 = getelementptr inbounds i32, i32* %ptr, i64 3
|
||||
|
|
|
@ -59,7 +59,7 @@ define <4 x i32> @test_v4i32(<4 x i32>* %src) {
|
|||
;
|
||||
; AVX512VL-LABEL: test_v4i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqa32 (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = load <4 x i32>, <4 x i32>* %src, align 16, !nontemporal !1
|
||||
ret <4 x i32> %1
|
||||
|
@ -229,7 +229,7 @@ define <8 x i32> @test_v8i32(<8 x i32>* %src) {
|
|||
;
|
||||
; AVX512VL-LABEL: test_v8i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqa32 (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vmovaps (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = load <8 x i32>, <8 x i32>* %src, align 32, !nontemporal !1
|
||||
ret <8 x i32> %1
|
||||
|
@ -1165,20 +1165,10 @@ define <4 x i32> @test_unaligned_v4i32(<4 x i32>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v4i32:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v4i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v4i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu32 (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v4i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <4 x i32>, <4 x i32>* %src, align 1, !nontemporal !1
|
||||
ret <4 x i32> %1
|
||||
}
|
||||
|
@ -1213,20 +1203,10 @@ define <2 x i64> @test_unaligned_v2i64(<2 x i64>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v2i64:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v2i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v2i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v2i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <2 x i64>, <2 x i64>* %src, align 1, !nontemporal !1
|
||||
ret <2 x i64> %1
|
||||
}
|
||||
|
@ -1242,20 +1222,10 @@ define <8 x i16> @test_unaligned_v8i16(<8 x i16>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v8i16:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v8i16:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v8i16:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v8i16:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <8 x i16>, <8 x i16>* %src, align 1, !nontemporal !1
|
||||
ret <8 x i16> %1
|
||||
}
|
||||
|
@ -1271,20 +1241,10 @@ define <16 x i8> @test_unaligned_v16i8(<16 x i8>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v16i8:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v16i8:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v16i8:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %xmm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v16i8:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %xmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <16 x i8>, <16 x i8>* %src, align 1, !nontemporal !1
|
||||
ret <16 x i8> %1
|
||||
}
|
||||
|
@ -1323,20 +1283,10 @@ define <8 x i32> @test_unaligned_v8i32(<8 x i32>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v8i32:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v8i32:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v8i32:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu32 (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v8i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <8 x i32>, <8 x i32>* %src, align 1, !nontemporal !1
|
||||
ret <8 x i32> %1
|
||||
}
|
||||
|
@ -1373,20 +1323,10 @@ define <4 x i64> @test_unaligned_v4i64(<4 x i64>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v4i64:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v4i64:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v4i64:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v4i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <4 x i64>, <4 x i64>* %src, align 1, !nontemporal !1
|
||||
ret <4 x i64> %1
|
||||
}
|
||||
|
@ -1403,20 +1343,10 @@ define <16 x i16> @test_unaligned_v16i16(<16 x i16>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v16i16:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v16i16:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v16i16:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v16i16:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <16 x i16>, <16 x i16>* %src, align 1, !nontemporal !1
|
||||
ret <16 x i16> %1
|
||||
}
|
||||
|
@ -1433,20 +1363,10 @@ define <32 x i8> @test_unaligned_v32i8(<32 x i8>* %src) {
|
|||
; AVX-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512F-LABEL: test_unaligned_v32i8:
|
||||
; AVX512F: # BB#0:
|
||||
; AVX512F-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; AVX512BW-LABEL: test_unaligned_v32i8:
|
||||
; AVX512BW: # BB#0:
|
||||
; AVX512BW-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512BW-NEXT: retq
|
||||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v32i8:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: retq
|
||||
; AVX512-LABEL: test_unaligned_v32i8:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <32 x i8>, <32 x i8>* %src, align 1, !nontemporal !1
|
||||
ret <32 x i8> %1
|
||||
}
|
||||
|
@ -1493,7 +1413,7 @@ define <16 x i32> @test_unaligned_v16i32(<16 x i32>* %src) {
|
|||
;
|
||||
; AVX512-LABEL: test_unaligned_v16i32:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovdqu32 (%rdi), %zmm0
|
||||
; AVX512-NEXT: vmovups (%rdi), %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <16 x i32>, <16 x i32>* %src, align 1, !nontemporal !1
|
||||
ret <16 x i32> %1
|
||||
|
@ -1539,7 +1459,7 @@ define <8 x i64> @test_unaligned_v8i64(<8 x i64>* %src) {
|
|||
;
|
||||
; AVX512-LABEL: test_unaligned_v8i64:
|
||||
; AVX512: # BB#0:
|
||||
; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0
|
||||
; AVX512-NEXT: vmovups (%rdi), %zmm0
|
||||
; AVX512-NEXT: retq
|
||||
%1 = load <8 x i64>, <8 x i64>* %src, align 1, !nontemporal !1
|
||||
ret <8 x i64> %1
|
||||
|
@ -1573,8 +1493,8 @@ define <32 x i16> @test_unaligned_v32i16(<32 x i16>* %src) {
|
|||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v32i16:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1
|
||||
; AVX512VL-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vmovups 32(%rdi), %ymm1
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = load <32 x i16>, <32 x i16>* %src, align 1, !nontemporal !1
|
||||
ret <32 x i16> %1
|
||||
|
@ -1608,8 +1528,8 @@ define <64 x i8> @test_unaligned_v64i8(<64 x i8>* %src) {
|
|||
;
|
||||
; AVX512VL-LABEL: test_unaligned_v64i8:
|
||||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: vmovdqu64 (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vmovdqu64 32(%rdi), %ymm1
|
||||
; AVX512VL-NEXT: vmovups (%rdi), %ymm0
|
||||
; AVX512VL-NEXT: vmovups 32(%rdi), %ymm1
|
||||
; AVX512VL-NEXT: retq
|
||||
%1 = load <64 x i8>, <64 x i8>* %src, align 1, !nontemporal !1
|
||||
ret <64 x i8> %1
|
||||
|
|
|
@ -1632,15 +1632,10 @@ define <4 x i32> @foldv4i32() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv4i32:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i32:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv4i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv4i32:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1661,15 +1656,10 @@ define <4 x i32> @foldv4i32u() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv4i32u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i32u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv4i32u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [23,0,32,24]
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv4i32u:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1690,15 +1680,10 @@ define <8 x i16> @foldv8i16() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv8i16:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i16:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv8i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv8i16:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1719,15 +1704,10 @@ define <8 x i16> @foldv8i16u() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv8i16u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i16u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv8i16u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [7,0,16,8,16,13,11,9]
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv8i16u:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1748,15 +1728,10 @@ define <16 x i8> @foldv16i8() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv16i8:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i8:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv16i8:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv16i8:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1777,15 +1752,10 @@ define <16 x i8> @foldv16i8u() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv16i8u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i8u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv16i8u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2]
|
||||
; AVX512-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv16i8u:
|
||||
; X32-SSE: # BB#0:
|
||||
|
|
|
@ -596,15 +596,10 @@ define <4 x i64> @foldv4i64() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv4i64:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i64:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv4i64:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
|
||||
ret <4 x i64> %out
|
||||
}
|
||||
|
@ -615,15 +610,10 @@ define <4 x i64> @foldv4i64u() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv4i64u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i64u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv4i64u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [55,0,64,56]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
|
||||
ret <4 x i64> %out
|
||||
}
|
||||
|
@ -634,15 +624,10 @@ define <8 x i32> @foldv8i32() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv8i32:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i32:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv8i32:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
|
||||
ret <8 x i32> %out
|
||||
}
|
||||
|
@ -653,15 +638,10 @@ define <8 x i32> @foldv8i32u() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv8i32u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa32 {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i32u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv8i32u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [23,0,32,24,0,29,27,25]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
|
||||
ret <8 x i32> %out
|
||||
}
|
||||
|
@ -672,15 +652,10 @@ define <16 x i16> @foldv16i16() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv16i16:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i16:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv16i16:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
|
||||
ret <16 x i16> %out
|
||||
}
|
||||
|
@ -691,15 +666,10 @@ define <16 x i16> @foldv16i16u() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv16i16u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i16u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv16i16u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [7,0,16,8,16,13,11,9,0,8,15,14,13,12,11,10]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
|
||||
ret <16 x i16> %out
|
||||
}
|
||||
|
@ -710,15 +680,10 @@ define <32 x i8> @foldv32i8() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv32i8:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv32i8:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv32i8:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
|
||||
ret <32 x i8> %out
|
||||
}
|
||||
|
@ -729,15 +694,10 @@ define <32 x i8> @foldv32i8u() nounwind {
|
|||
; AVX-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; AVX512VLCD-LABEL: foldv32i8u:
|
||||
; AVX512VLCD: ## BB#0:
|
||||
; AVX512VLCD-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX512VLCD-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv32i8u:
|
||||
; AVX512CD: ## BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX512-LABEL: foldv32i8u:
|
||||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,5,3,1,0,0,7,6,5,4,3,2,1,0,8,8,0,0,0,0,0,0,0,0,6,5,5,1]
|
||||
; AVX512-NEXT: retq
|
||||
%out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
|
||||
ret <32 x i8> %out
|
||||
}
|
||||
|
|
|
@ -1435,25 +1435,10 @@ define <4 x i32> @foldv4i32() nounwind {
|
|||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: foldv4i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv4i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv4i32:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i32:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX-LABEL: foldv4i32:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv4i32:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1469,25 +1454,10 @@ define <4 x i32> @foldv4i32u() nounwind {
|
|||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: foldv4i32u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv4i32u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv4i32u:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i32u:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX-LABEL: foldv4i32u:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,32,0]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv4i32u:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1503,25 +1473,10 @@ define <8 x i16> @foldv8i16() nounwind {
|
|||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: foldv8i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv8i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv8i16:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i16:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX-LABEL: foldv8i16:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv8i16:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1537,25 +1492,10 @@ define <8 x i16> @foldv8i16u() nounwind {
|
|||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: foldv8i16u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv8i16u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv8i16u:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i16u:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX-LABEL: foldv8i16u:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,16,0,16,0,3,3]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv8i16u:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1571,25 +1511,10 @@ define <16 x i8> @foldv16i8() nounwind {
|
|||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: foldv16i8:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv16i8:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv16i8:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i8:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX-LABEL: foldv16i8:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv16i8:
|
||||
; X32-SSE: # BB#0:
|
||||
|
@ -1605,25 +1530,10 @@ define <16 x i8> @foldv16i8u() nounwind {
|
|||
; SSE-NEXT: movaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX1-LABEL: foldv16i8u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv16i8u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv16i8u:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i8u:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CD-NEXT: retq
|
||||
; AVX-LABEL: foldv16i8u:
|
||||
; AVX: # BB#0:
|
||||
; AVX-NEXT: vmovaps {{.*#+}} xmm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX-NEXT: retq
|
||||
;
|
||||
; X32-SSE-LABEL: foldv16i8u:
|
||||
; X32-SSE: # BB#0:
|
||||
|
|
|
@ -713,193 +713,73 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
|
|||
}
|
||||
|
||||
define <4 x i64> @foldv4i64() nounwind {
|
||||
; AVX1-LABEL: foldv4i64:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv4i64:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv4i64:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i64:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv4i64:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 0)
|
||||
ret <4 x i64> %out
|
||||
}
|
||||
|
||||
define <4 x i64> @foldv4i64u() nounwind {
|
||||
; AVX1-LABEL: foldv4i64u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv4i64u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv4i64u:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv4i64u:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv4i64u:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,64,0]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <4 x i64> @llvm.cttz.v4i64(<4 x i64> <i64 256, i64 -1, i64 0, i64 255>, i1 -1)
|
||||
ret <4 x i64> %out
|
||||
}
|
||||
|
||||
define <8 x i32> @foldv8i32() nounwind {
|
||||
; AVX1-LABEL: foldv8i32:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv8i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv8i32:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i32:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv8i32:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 0)
|
||||
ret <8 x i32> %out
|
||||
}
|
||||
|
||||
define <8 x i32> @foldv8i32u() nounwind {
|
||||
; AVX1-LABEL: foldv8i32u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv8i32u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv8i32u:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa32 {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv8i32u:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv8i32u:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,32,0,16,0,3,3]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <8 x i32> @llvm.cttz.v8i32(<8 x i32> <i32 256, i32 -1, i32 0, i32 255, i32 -65536, i32 7, i32 24, i32 88>, i1 -1)
|
||||
ret <8 x i32> %out
|
||||
}
|
||||
|
||||
define <16 x i16> @foldv16i16() nounwind {
|
||||
; AVX1-LABEL: foldv16i16:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv16i16:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv16i16:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i16:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv16i16:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 0)
|
||||
ret <16 x i16> %out
|
||||
}
|
||||
|
||||
define <16 x i16> @foldv16i16u() nounwind {
|
||||
; AVX1-LABEL: foldv16i16u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv16i16u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv16i16u:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv16i16u:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv16i16u:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,16,0,16,0,3,3,1,1,0,1,2,3,4,5]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <16 x i16> @llvm.cttz.v16i16(<16 x i16> <i16 256, i16 -1, i16 0, i16 255, i16 -65536, i16 7, i16 24, i16 88, i16 -2, i16 254, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32>, i1 -1)
|
||||
ret <16 x i16> %out
|
||||
}
|
||||
|
||||
define <32 x i8> @foldv32i8() nounwind {
|
||||
; AVX1-LABEL: foldv32i8:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv32i8:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv32i8:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv32i8:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv32i8:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 0)
|
||||
ret <32 x i8> %out
|
||||
}
|
||||
|
||||
define <32 x i8> @foldv32i8u() nounwind {
|
||||
; AVX1-LABEL: foldv32i8u:
|
||||
; AVX1: # BB#0:
|
||||
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX1-NEXT: retq
|
||||
;
|
||||
; AVX2-LABEL: foldv32i8u:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX2-NEXT: retq
|
||||
;
|
||||
; AVX512CDVL-LABEL: foldv32i8u:
|
||||
; AVX512CDVL: # BB#0:
|
||||
; AVX512CDVL-NEXT: vmovdqa64 {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX512CDVL-NEXT: retq
|
||||
;
|
||||
; AVX512CD-LABEL: foldv32i8u:
|
||||
; AVX512CD: # BB#0:
|
||||
; AVX512CD-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; AVX512CD-NEXT: retq
|
||||
; ALL-LABEL: foldv32i8u:
|
||||
; ALL: # BB#0:
|
||||
; ALL-NEXT: vmovaps {{.*#+}} ymm0 = [8,0,8,0,8,0,3,3,1,1,0,1,2,3,4,5,6,7,8,8,7,6,5,4,3,2,1,0,0,0,0,0]
|
||||
; ALL-NEXT: retq
|
||||
%out = call <32 x i8> @llvm.cttz.v32i8(<32 x i8> <i8 256, i8 -1, i8 0, i8 255, i8 -65536, i8 7, i8 24, i8 88, i8 -2, i8 254, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 128, i8 256, i8 -256, i8 -128, i8 -64, i8 -32, i8 -16, i8 -8, i8 -4, i8 -2, i8 -1, i8 3, i8 5, i8 7, i8 127>, i1 -1)
|
||||
ret <32 x i8> %out
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue