forked from OSchip/llvm-project
[AVX-512] Teach two address instruction pass to replace masked move instructions with blendm instructions when its beneficial.
Isel now selects masked move instructions for vselect instead of blendm. But sometimes it beneficial to register allocation to remove the tied register constraint by using blendm instructions. This also picks up cases where the masked move was created due to a masked load intrinsic. Differential Revision: https://reviews.llvm.org/D28454 llvm-svn: 292005
This commit is contained in:
parent
09b7e0f01d
commit
63e2cd6caa
|
@ -2738,7 +2738,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
|||
[(set _.RC:$dst, (_.VT (bitconvert (ld_frag addr:$src))))],
|
||||
_.ExeDomain>, EVEX;
|
||||
|
||||
let Constraints = "$src0 = $dst" in {
|
||||
let Constraints = "$src0 = $dst", isConvertibleToThreeAddress = 1 in {
|
||||
def rrk : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst),
|
||||
(ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1),
|
||||
!strconcat(OpcodeStr, "\t{$src1, ${dst} {${mask}}|",
|
||||
|
|
|
@ -4044,6 +4044,131 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
|
|||
BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r)).add(Dest).add(Src),
|
||||
MI.getOperand(2));
|
||||
break;
|
||||
|
||||
case X86::VMOVDQU8Z128rmk:
|
||||
case X86::VMOVDQU8Z256rmk:
|
||||
case X86::VMOVDQU8Zrmk:
|
||||
case X86::VMOVDQU16Z128rmk:
|
||||
case X86::VMOVDQU16Z256rmk:
|
||||
case X86::VMOVDQU16Zrmk:
|
||||
case X86::VMOVDQU32Z128rmk: case X86::VMOVDQA32Z128rmk:
|
||||
case X86::VMOVDQU32Z256rmk: case X86::VMOVDQA32Z256rmk:
|
||||
case X86::VMOVDQU32Zrmk: case X86::VMOVDQA32Zrmk:
|
||||
case X86::VMOVDQU64Z128rmk: case X86::VMOVDQA64Z128rmk:
|
||||
case X86::VMOVDQU64Z256rmk: case X86::VMOVDQA64Z256rmk:
|
||||
case X86::VMOVDQU64Zrmk: case X86::VMOVDQA64Zrmk:
|
||||
case X86::VMOVUPDZ128rmk: case X86::VMOVAPDZ128rmk:
|
||||
case X86::VMOVUPDZ256rmk: case X86::VMOVAPDZ256rmk:
|
||||
case X86::VMOVUPDZrmk: case X86::VMOVAPDZrmk:
|
||||
case X86::VMOVUPSZ128rmk: case X86::VMOVAPSZ128rmk:
|
||||
case X86::VMOVUPSZ256rmk: case X86::VMOVAPSZ256rmk:
|
||||
case X86::VMOVUPSZrmk: case X86::VMOVAPSZrmk: {
|
||||
unsigned Opc;
|
||||
switch (MIOpc) {
|
||||
default: llvm_unreachable("Unreachable!");
|
||||
case X86::VMOVDQU8Z128rmk: Opc = X86::VPBLENDMBZ128rmk; break;
|
||||
case X86::VMOVDQU8Z256rmk: Opc = X86::VPBLENDMBZ256rmk; break;
|
||||
case X86::VMOVDQU8Zrmk: Opc = X86::VPBLENDMBZrmk; break;
|
||||
case X86::VMOVDQU16Z128rmk: Opc = X86::VPBLENDMWZ128rmk; break;
|
||||
case X86::VMOVDQU16Z256rmk: Opc = X86::VPBLENDMWZ256rmk; break;
|
||||
case X86::VMOVDQU16Zrmk: Opc = X86::VPBLENDMWZrmk; break;
|
||||
case X86::VMOVDQU32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
|
||||
case X86::VMOVDQU32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
|
||||
case X86::VMOVDQU32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
|
||||
case X86::VMOVDQU64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
|
||||
case X86::VMOVDQU64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
|
||||
case X86::VMOVDQU64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
|
||||
case X86::VMOVUPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
|
||||
case X86::VMOVUPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
|
||||
case X86::VMOVUPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
|
||||
case X86::VMOVUPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
|
||||
case X86::VMOVUPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
|
||||
case X86::VMOVUPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
|
||||
case X86::VMOVDQA32Z128rmk: Opc = X86::VPBLENDMDZ128rmk; break;
|
||||
case X86::VMOVDQA32Z256rmk: Opc = X86::VPBLENDMDZ256rmk; break;
|
||||
case X86::VMOVDQA32Zrmk: Opc = X86::VPBLENDMDZrmk; break;
|
||||
case X86::VMOVDQA64Z128rmk: Opc = X86::VPBLENDMQZ128rmk; break;
|
||||
case X86::VMOVDQA64Z256rmk: Opc = X86::VPBLENDMQZ256rmk; break;
|
||||
case X86::VMOVDQA64Zrmk: Opc = X86::VPBLENDMQZrmk; break;
|
||||
case X86::VMOVAPDZ128rmk: Opc = X86::VBLENDMPDZ128rmk; break;
|
||||
case X86::VMOVAPDZ256rmk: Opc = X86::VBLENDMPDZ256rmk; break;
|
||||
case X86::VMOVAPDZrmk: Opc = X86::VBLENDMPDZrmk; break;
|
||||
case X86::VMOVAPSZ128rmk: Opc = X86::VBLENDMPSZ128rmk; break;
|
||||
case X86::VMOVAPSZ256rmk: Opc = X86::VBLENDMPSZ256rmk; break;
|
||||
case X86::VMOVAPSZrmk: Opc = X86::VBLENDMPSZrmk; break;
|
||||
}
|
||||
|
||||
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
|
||||
.add(Dest)
|
||||
.add(MI.getOperand(2))
|
||||
.add(Src)
|
||||
.add(MI.getOperand(3))
|
||||
.add(MI.getOperand(4))
|
||||
.add(MI.getOperand(5))
|
||||
.add(MI.getOperand(6))
|
||||
.add(MI.getOperand(7));
|
||||
break;
|
||||
}
|
||||
case X86::VMOVDQU8Z128rrk:
|
||||
case X86::VMOVDQU8Z256rrk:
|
||||
case X86::VMOVDQU8Zrrk:
|
||||
case X86::VMOVDQU16Z128rrk:
|
||||
case X86::VMOVDQU16Z256rrk:
|
||||
case X86::VMOVDQU16Zrrk:
|
||||
case X86::VMOVDQU32Z128rrk: case X86::VMOVDQA32Z128rrk:
|
||||
case X86::VMOVDQU32Z256rrk: case X86::VMOVDQA32Z256rrk:
|
||||
case X86::VMOVDQU32Zrrk: case X86::VMOVDQA32Zrrk:
|
||||
case X86::VMOVDQU64Z128rrk: case X86::VMOVDQA64Z128rrk:
|
||||
case X86::VMOVDQU64Z256rrk: case X86::VMOVDQA64Z256rrk:
|
||||
case X86::VMOVDQU64Zrrk: case X86::VMOVDQA64Zrrk:
|
||||
case X86::VMOVUPDZ128rrk: case X86::VMOVAPDZ128rrk:
|
||||
case X86::VMOVUPDZ256rrk: case X86::VMOVAPDZ256rrk:
|
||||
case X86::VMOVUPDZrrk: case X86::VMOVAPDZrrk:
|
||||
case X86::VMOVUPSZ128rrk: case X86::VMOVAPSZ128rrk:
|
||||
case X86::VMOVUPSZ256rrk: case X86::VMOVAPSZ256rrk:
|
||||
case X86::VMOVUPSZrrk: case X86::VMOVAPSZrrk: {
|
||||
unsigned Opc;
|
||||
switch (MIOpc) {
|
||||
default: llvm_unreachable("Unreachable!");
|
||||
case X86::VMOVDQU8Z128rrk: Opc = X86::VPBLENDMBZ128rrk; break;
|
||||
case X86::VMOVDQU8Z256rrk: Opc = X86::VPBLENDMBZ256rrk; break;
|
||||
case X86::VMOVDQU8Zrrk: Opc = X86::VPBLENDMBZrrk; break;
|
||||
case X86::VMOVDQU16Z128rrk: Opc = X86::VPBLENDMWZ128rrk; break;
|
||||
case X86::VMOVDQU16Z256rrk: Opc = X86::VPBLENDMWZ256rrk; break;
|
||||
case X86::VMOVDQU16Zrrk: Opc = X86::VPBLENDMWZrrk; break;
|
||||
case X86::VMOVDQU32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
|
||||
case X86::VMOVDQU32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
|
||||
case X86::VMOVDQU32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
|
||||
case X86::VMOVDQU64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
|
||||
case X86::VMOVDQU64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
|
||||
case X86::VMOVDQU64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
|
||||
case X86::VMOVUPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
|
||||
case X86::VMOVUPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
|
||||
case X86::VMOVUPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
|
||||
case X86::VMOVUPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
|
||||
case X86::VMOVUPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
|
||||
case X86::VMOVUPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
|
||||
case X86::VMOVDQA32Z128rrk: Opc = X86::VPBLENDMDZ128rrk; break;
|
||||
case X86::VMOVDQA32Z256rrk: Opc = X86::VPBLENDMDZ256rrk; break;
|
||||
case X86::VMOVDQA32Zrrk: Opc = X86::VPBLENDMDZrrk; break;
|
||||
case X86::VMOVDQA64Z128rrk: Opc = X86::VPBLENDMQZ128rrk; break;
|
||||
case X86::VMOVDQA64Z256rrk: Opc = X86::VPBLENDMQZ256rrk; break;
|
||||
case X86::VMOVDQA64Zrrk: Opc = X86::VPBLENDMQZrrk; break;
|
||||
case X86::VMOVAPDZ128rrk: Opc = X86::VBLENDMPDZ128rrk; break;
|
||||
case X86::VMOVAPDZ256rrk: Opc = X86::VBLENDMPDZ256rrk; break;
|
||||
case X86::VMOVAPDZrrk: Opc = X86::VBLENDMPDZrrk; break;
|
||||
case X86::VMOVAPSZ128rrk: Opc = X86::VBLENDMPSZ128rrk; break;
|
||||
case X86::VMOVAPSZ256rrk: Opc = X86::VBLENDMPSZ256rrk; break;
|
||||
case X86::VMOVAPSZrrk: Opc = X86::VBLENDMPSZrrk; break;
|
||||
}
|
||||
|
||||
NewMI = BuildMI(MF, MI.getDebugLoc(), get(Opc))
|
||||
.add(Dest)
|
||||
.add(MI.getOperand(2))
|
||||
.add(Src)
|
||||
.add(MI.getOperand(3));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!NewMI) return nullptr;
|
||||
|
|
|
@ -6,17 +6,14 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
|
|||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
|
||||
; AVX512BW-NEXT: vmovupd (%rdi), %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: kshiftrd $16, %k1, %k2
|
||||
; AVX512BW-NEXT: vmovupd 128(%rdi), %zmm3 {%k2}
|
||||
; AVX512BW-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
|
||||
; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
|
||||
; AVX512BW-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
|
||||
; AVX512BW-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
|
||||
; AVX512BW-NEXT: vmovapd %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vmovapd %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vmovapd %zmm3, %zmm2
|
||||
; AVX512BW-NEXT: vmovapd %zmm4, %zmm3
|
||||
; AVX512BW-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
|
||||
; AVX512BW-NEXT: vmovapd %zmm5, %zmm2
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
|
||||
ret <32 x double> %res
|
||||
|
@ -27,17 +24,14 @@ define <32 x i64> @test_load_32i64(<32 x i64>* %ptrs, <32 x i1> %mask, <32 x i64
|
|||
; AVX512BW: ## BB#0:
|
||||
; AVX512BW-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; AVX512BW-NEXT: vpmovb2m %zmm0, %k1
|
||||
; AVX512BW-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: vpblendmq (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; AVX512BW-NEXT: kshiftrd $16, %k1, %k2
|
||||
; AVX512BW-NEXT: vmovdqu64 128(%rdi), %zmm3 {%k2}
|
||||
; AVX512BW-NEXT: vpblendmq 128(%rdi), %zmm3, %zmm5 {%k2}
|
||||
; AVX512BW-NEXT: kshiftrw $8, %k1, %k1
|
||||
; AVX512BW-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
|
||||
; AVX512BW-NEXT: vpblendmq 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; AVX512BW-NEXT: kshiftrw $8, %k2, %k1
|
||||
; AVX512BW-NEXT: vmovdqu64 192(%rdi), %zmm4 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm2, %zmm1
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm3, %zmm2
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm4, %zmm3
|
||||
; AVX512BW-NEXT: vpblendmq 192(%rdi), %zmm4, %zmm3 {%k1}
|
||||
; AVX512BW-NEXT: vmovdqa64 %zmm5, %zmm2
|
||||
; AVX512BW-NEXT: retq
|
||||
%res = call <32 x i64> @llvm.masked.load.v32i64.p0v32i64(<32 x i64>* %ptrs, i32 4, <32 x i1> %mask, <32 x i64> %src0)
|
||||
ret <32 x i64> %res
|
||||
|
|
|
@ -43,8 +43,7 @@ define <16 x float> @test4(<16 x i32> %trigger, <16 x float>* %addr, <16 x float
|
|||
; AVX512: ## BB#0:
|
||||
; AVX512-NEXT: vpxord %zmm2, %zmm2, %zmm2
|
||||
; AVX512-NEXT: vpcmpeqd %zmm2, %zmm0, %k1
|
||||
; AVX512-NEXT: vmovups (%rdi), %zmm1 {%k1}
|
||||
; AVX512-NEXT: vmovaps %zmm1, %zmm0
|
||||
; AVX512-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; AVX512-NEXT: retq
|
||||
%mask = icmp eq <16 x i32> %trigger, zeroinitializer
|
||||
%res = call <16 x float> @llvm.masked.load.v16f32.p0v16f32(<16 x float>* %addr, i32 4, <16 x i1>%mask, <16 x float> %dst)
|
||||
|
@ -189,22 +188,18 @@ define <16 x i64> @test_load_16i64(<16 x i64>* %ptrs, <16 x i1> %mask, <16 x i64
|
|||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
|
||||
; AVX512F-NEXT: vpblendmq (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; AVX512F-NEXT: kshiftrw $8, %k1, %k1
|
||||
; AVX512F-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vmovdqa64 %zmm2, %zmm1
|
||||
; AVX512F-NEXT: vpblendmq 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_load_16i64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu64 (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: vpblendmq (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovdqu64 64(%rdi), %zmm2 {%k1}
|
||||
; SKX-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovdqa64 %zmm2, %zmm1
|
||||
; SKX-NEXT: vpblendmq 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x i64> @llvm.masked.load.v16i64.p0v16i64(<16 x i64>* %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
|
||||
ret <16 x i64> %res
|
||||
|
@ -217,22 +212,18 @@ define <16 x double> @test_load_16f64(<16 x double>* %ptrs, <16 x i1> %mask, <16
|
|||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
|
||||
; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k1}
|
||||
; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; AVX512F-NEXT: kshiftrw $8, %k1, %k1
|
||||
; AVX512F-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
|
||||
; AVX512F-NEXT: vmovapd %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vmovapd %zmm2, %zmm1
|
||||
; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_load_16f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovb2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
|
||||
; SKX-NEXT: vmovapd %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovapd %zmm2, %zmm1
|
||||
; SKX-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <16 x double> @llvm.masked.load.v16f64.p0v16f64(<16 x double>* %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
|
||||
ret <16 x double> %res
|
||||
|
@ -246,36 +237,30 @@ define <32 x double> @test_load_32f64(<32 x double>* %ptrs, <32 x i1> %mask, <32
|
|||
; AVX512F-NEXT: vpmovsxbd %xmm5, %zmm5
|
||||
; AVX512F-NEXT: vpslld $31, %zmm5, %zmm5
|
||||
; AVX512F-NEXT: vptestmd %zmm5, %zmm5, %k1
|
||||
; AVX512F-NEXT: vmovupd 128(%rdi), %zmm3 {%k1}
|
||||
; AVX512F-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k1}
|
||||
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
|
||||
; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
|
||||
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
|
||||
; AVX512F-NEXT: vmovupd (%rdi), %zmm1 {%k2}
|
||||
; AVX512F-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k2}
|
||||
; AVX512F-NEXT: kshiftrw $8, %k1, %k1
|
||||
; AVX512F-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
|
||||
; AVX512F-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
|
||||
; AVX512F-NEXT: kshiftrw $8, %k2, %k1
|
||||
; AVX512F-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
|
||||
; AVX512F-NEXT: vmovapd %zmm1, %zmm0
|
||||
; AVX512F-NEXT: vmovapd %zmm2, %zmm1
|
||||
; AVX512F-NEXT: vmovapd %zmm3, %zmm2
|
||||
; AVX512F-NEXT: vmovapd %zmm4, %zmm3
|
||||
; AVX512F-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; AVX512F-NEXT: vmovapd %zmm5, %zmm2
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test_load_32f64:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; SKX-NEXT: vpmovb2m %ymm0, %k1
|
||||
; SKX-NEXT: vmovupd (%rdi), %zmm1 {%k1}
|
||||
; SKX-NEXT: vblendmpd (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; SKX-NEXT: kshiftrd $16, %k1, %k2
|
||||
; SKX-NEXT: vmovupd 128(%rdi), %zmm3 {%k2}
|
||||
; SKX-NEXT: vblendmpd 128(%rdi), %zmm3, %zmm5 {%k2}
|
||||
; SKX-NEXT: kshiftrw $8, %k1, %k1
|
||||
; SKX-NEXT: vmovupd 64(%rdi), %zmm2 {%k1}
|
||||
; SKX-NEXT: vblendmpd 64(%rdi), %zmm2, %zmm1 {%k1}
|
||||
; SKX-NEXT: kshiftrw $8, %k2, %k1
|
||||
; SKX-NEXT: vmovupd 192(%rdi), %zmm4 {%k1}
|
||||
; SKX-NEXT: vmovapd %zmm1, %zmm0
|
||||
; SKX-NEXT: vmovapd %zmm2, %zmm1
|
||||
; SKX-NEXT: vmovapd %zmm3, %zmm2
|
||||
; SKX-NEXT: vmovapd %zmm4, %zmm3
|
||||
; SKX-NEXT: vblendmpd 192(%rdi), %zmm4, %zmm3 {%k1}
|
||||
; SKX-NEXT: vmovapd %zmm5, %zmm2
|
||||
; SKX-NEXT: retq
|
||||
%res = call <32 x double> @llvm.masked.load.v32f64.p0v32f64(<32 x double>* %ptrs, i32 4, <32 x i1> %mask, <32 x double> %src0)
|
||||
ret <32 x double> %res
|
||||
|
|
|
@ -20,8 +20,7 @@ define <32 x i8> @test_mask_load_32xi8(<32 x i1> %mask, <32 x i8>* %addr, <32 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpmovb2m %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 (%rdi), %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmb (%rdi), %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i8> @llvm.masked.load.v32i8.p0v32i8(<32 x i8>* %addr, i32 4, <32 x i1>%mask, <32 x i8> %val)
|
||||
ret <32 x i8> %res
|
||||
|
@ -33,8 +32,7 @@ define <64 x i8> @test_mask_load_64xi8(<64 x i1> %mask, <64 x i8>* %addr, <64 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsllw $7, %zmm0, %zmm0
|
||||
; CHECK-NEXT: vpmovb2m %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmb (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <64 x i8> @llvm.masked.load.v64i8.p0v64i8(<64 x i8>* %addr, i32 4, <64 x i1>%mask, <64 x i8> %val)
|
||||
ret <64 x i8> %res
|
||||
|
@ -70,8 +68,7 @@ define <32 x i16> @test_mask_load_32xi16(<32 x i1> %mask, <32 x i16>* %addr, <32
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpsllw $7, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vpmovb2m %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%res = call <32 x i16> @llvm.masked.load.v32i16.p0v32i16(<32 x i16>* %addr, i32 4, <32 x i1>%mask, <32 x i16> %val)
|
||||
ret <32 x i16> %res
|
||||
|
|
|
@ -325,13 +325,11 @@ define x86_regcallcc [4 x i32]* @test_CallargRetPointer([4 x i32]* %a) {
|
|||
}
|
||||
|
||||
; X32-LABEL: test_argRet128Vector:
|
||||
; X32: vmovdqa{{.*}} %xmm0, %xmm1
|
||||
; X32: vmovdqa{{.*}} %xmm1, %xmm0
|
||||
; X32: vpblend{{.*}} %xmm0, %xmm1, %xmm0
|
||||
; X32: ret{{.*}}
|
||||
|
||||
; WIN64-LABEL: test_argRet128Vector:
|
||||
; WIN64: vmovdqa{{.*}} %xmm0, %xmm1
|
||||
; WIN64: vmovdqa{{.*}} %xmm1, %xmm0
|
||||
; WIN64: vpblend{{.*}} %xmm0, %xmm1, %xmm0
|
||||
; WIN64: ret{{.*}}
|
||||
|
||||
; Test regcall when receiving/returning 128 bit vector
|
||||
|
@ -360,13 +358,11 @@ define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a) {
|
|||
}
|
||||
|
||||
; X32-LABEL: test_argRet256Vector:
|
||||
; X32: vmovdqa{{.*}} %ymm0, %ymm1
|
||||
; X32: vmovdqa{{.*}} %ymm1, %ymm0
|
||||
; X32: vpblend{{.*}} %ymm0, %ymm1, %ymm0
|
||||
; X32: ret{{.*}}
|
||||
|
||||
; WIN64-LABEL: test_argRet256Vector:
|
||||
; WIN64: vmovdqa{{.*}} %ymm0, %ymm1
|
||||
; WIN64: vmovdqa{{.*}} %ymm1, %ymm0
|
||||
; WIN64: vpblend{{.*}} %ymm0, %ymm1, %ymm0
|
||||
; WIN64: ret{{.*}}
|
||||
|
||||
; Test regcall when receiving/returning 256 bit vector
|
||||
|
@ -395,13 +391,11 @@ define x86_regcallcc <8 x i32> @test_CallargRet256Vector(<8 x i32> %a) {
|
|||
}
|
||||
|
||||
; X32-LABEL: test_argRet512Vector:
|
||||
; X32: vmovdqa{{.*}} %zmm0, %zmm1
|
||||
; X32: vmovdqa{{.*}} %zmm1, %zmm0
|
||||
; X32: vpblend{{.*}} %zmm0, %zmm1, %zmm0
|
||||
; X32: ret{{.*}}
|
||||
|
||||
; WIN64-LABEL: test_argRet512Vector:
|
||||
; WIN64: vmovdqa{{.*}} %zmm0, %zmm1
|
||||
; WIN64: vmovdqa{{.*}} %zmm1, %zmm0
|
||||
; WIN64: vpblend{{.*}} %zmm0, %zmm1, %zmm0
|
||||
; WIN64: ret{{.*}}
|
||||
|
||||
; Test regcall when receiving/returning 512 bit vector
|
||||
|
|
|
@ -6,8 +6,7 @@ define <16 x float> @test1(<16 x float> %x, <16 x float> %y) nounwind {
|
|||
; CHECK-LABEL: test1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcmpleps %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = fcmp ole <16 x float> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x float> %x, <16 x float> %y
|
||||
|
@ -18,8 +17,7 @@ define <8 x double> @test2(<8 x double> %x, <8 x double> %y) nounwind {
|
|||
; CHECK-LABEL: test2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcmplepd %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovapd %zmm1, %zmm0
|
||||
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = fcmp ole <8 x double> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x double> %x, <8 x double> %y
|
||||
|
@ -30,8 +28,7 @@ define <16 x i32> @test3(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %yp) nounwin
|
|||
; CHECK-LABEL: test3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i32>, <16 x i32>* %yp, align 4
|
||||
%mask = icmp eq <16 x i32> %x, %y
|
||||
|
@ -43,8 +40,7 @@ define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1)
|
|||
; CHECK-LABEL: test4_unsigned:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp uge <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
|
||||
|
@ -55,8 +51,7 @@ define <8 x i64> @test5(<8 x i64> %x, <8 x i64> %y) nounwind {
|
|||
; CHECK-LABEL: test5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp eq <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
|
||||
|
@ -67,8 +62,7 @@ define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) noun
|
|||
; CHECK-LABEL: test6_unsigned:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmq %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp ugt <8 x i64> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
|
||||
|
@ -87,8 +81,7 @@ define <4 x float> @test7(<4 x float> %a, <4 x float> %b) {
|
|||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vxorps %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vcmpltps %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%mask = fcmp olt <4 x float> %a, zeroinitializer
|
||||
|
@ -108,8 +101,7 @@ define <2 x double> @test8(<2 x double> %a, <2 x double> %b) {
|
|||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vxorpd %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vcmpltpd %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = fcmp olt <2 x double> %a, zeroinitializer
|
||||
%c = select <2 x i1>%mask, <2 x double>%a, <2 x double>%b
|
||||
|
@ -122,15 +114,14 @@ define <8 x i32> @test9(<8 x i32> %x, <8 x i32> %y) nounwind {
|
|||
; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; KNL-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
|
||||
; KNL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; KNL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test9:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
|
||||
; SKX-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; SKX-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <8 x i32> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
|
||||
|
@ -143,15 +134,14 @@ define <8 x float> @test10(<8 x float> %x, <8 x float> %y) nounwind {
|
|||
; KNL-NEXT: ## kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
|
||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; KNL-NEXT: vcmpeqps %zmm1, %zmm0, %k1
|
||||
; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vmovaps %ymm1, %ymm0
|
||||
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test10:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpeqps %ymm1, %ymm0, %k1
|
||||
; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%mask = fcmp oeq <8 x float> %x, %y
|
||||
|
@ -699,8 +689,7 @@ define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind
|
|||
; CHECK-LABEL: test16:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sge <16 x i32> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
|
||||
|
@ -711,8 +700,7 @@ define <16 x i32> @test17(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test17:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sgt <16 x i32> %x, %y
|
||||
|
@ -724,8 +712,7 @@ define <16 x i32> @test18(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test18:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sle <16 x i32> %x, %y
|
||||
|
@ -737,8 +724,7 @@ define <16 x i32> @test19(<16 x i32> %x, <16 x i32> %x1, <16 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test19:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||
%mask = icmp ule <16 x i32> %x, %y
|
||||
|
@ -751,8 +737,7 @@ define <16 x i32> @test20(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1, <16 x i3
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpeqd %zmm3, %zmm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp eq <16 x i32> %x1, %y1
|
||||
%mask0 = icmp eq <16 x i32> %x, %y
|
||||
|
@ -766,8 +751,7 @@ define <8 x i64> @test21(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1, <8 x i64> %y
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpleq %zmm2, %zmm3, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm2, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <8 x i64> %x1, %y1
|
||||
%mask0 = icmp sle <8 x i64> %x, %y
|
||||
|
@ -781,8 +765,7 @@ define <8 x i64> @test22(<8 x i64> %x, <8 x i64>* %y.ptr, <8 x i64> %x1, <8 x i6
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %zmm2, %zmm1, %k1
|
||||
; CHECK-NEXT: vpcmpgtq (%rdi), %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sgt <8 x i64> %x1, %y1
|
||||
%y = load <8 x i64>, <8 x i64>* %y.ptr, align 4
|
||||
|
@ -797,8 +780,7 @@ define <16 x i32> @test23(<16 x i32> %x, <16 x i32>* %y.ptr, <16 x i32> %x1, <16
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <16 x i32> %x1, %y1
|
||||
%y = load <16 x i32>, <16 x i32>* %y.ptr, align 4
|
||||
|
@ -812,8 +794,7 @@ define <8 x i64> @test24(<8 x i64> %x, <8 x i64> %x1, i64* %yb.ptr) nounwind {
|
|||
; CHECK-LABEL: test24:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%yb = load i64, i64* %yb.ptr, align 4
|
||||
%y.0 = insertelement <8 x i64> undef, i64 %yb, i32 0
|
||||
|
@ -827,8 +808,7 @@ define <16 x i32> @test25(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1) nounwind
|
|||
; CHECK-LABEL: test25:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%yb = load i32, i32* %yb.ptr, align 4
|
||||
%y.0 = insertelement <16 x i32> undef, i32 %yb, i32 0
|
||||
|
@ -843,8 +823,7 @@ define <16 x i32> @test26(<16 x i32> %x, i32* %yb.ptr, <16 x i32> %x1, <16 x i32
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %zmm1, %zmm2, %k1
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <16 x i32> %x1, %y1
|
||||
%yb = load i32, i32* %yb.ptr, align 4
|
||||
|
@ -861,8 +840,7 @@ define <8 x i64> @test27(<8 x i64> %x, i64* %yb.ptr, <8 x i64> %x1, <8 x i64> %y
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleq %zmm1, %zmm2, %k1
|
||||
; CHECK-NEXT: vpcmpleq (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <8 x i64> %x1, %y1
|
||||
%yb = load i64, i64* %yb.ptr, align 4
|
||||
|
@ -932,8 +910,7 @@ define <4 x double> @test30(<4 x double> %x, <4 x double> %y) nounwind {
|
|||
; SKX-LABEL: test30:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
|
||||
; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%mask = fcmp oeq <4 x double> %x, %y
|
||||
|
@ -951,8 +928,7 @@ define <2 x double> @test31(<2 x double> %x, <2 x double> %x1, <2 x double>* %yp
|
|||
; SKX-LABEL: test31:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltpd (%rdi), %xmm0, %k1
|
||||
; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%y = load <2 x double>, <2 x double>* %yp, align 4
|
||||
|
@ -971,8 +947,7 @@ define <4 x double> @test32(<4 x double> %x, <4 x double> %x1, <4 x double>* %yp
|
|||
; SKX-LABEL: test32:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltpd (%rdi), %ymm0, %k1
|
||||
; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%y = load <4 x double>, <4 x double>* %yp, align 4
|
||||
|
@ -985,8 +960,7 @@ define <8 x double> @test33(<8 x double> %x, <8 x double> %x1, <8 x double>* %yp
|
|||
; CHECK-LABEL: test33:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcmpltpd (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovapd %zmm1, %zmm0
|
||||
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x double>, <8 x double>* %yp, align 4
|
||||
%mask = fcmp olt <8 x double> %x, %y
|
||||
|
@ -1004,8 +978,7 @@ define <4 x float> @test34(<4 x float> %x, <4 x float> %x1, <4 x float>* %yp) no
|
|||
; SKX-LABEL: test34:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltps (%rdi), %xmm0, %k1
|
||||
; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%y = load <4 x float>, <4 x float>* %yp, align 4
|
||||
%mask = fcmp olt <4 x float> %x, %y
|
||||
|
@ -1020,15 +993,14 @@ define <8 x float> @test35(<8 x float> %x, <8 x float> %x1, <8 x float>* %yp) no
|
|||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; KNL-NEXT: vmovups (%rdi), %ymm2
|
||||
; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
|
||||
; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vmovaps %ymm1, %ymm0
|
||||
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test35:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltps (%rdi), %ymm0, %k1
|
||||
; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%y = load <8 x float>, <8 x float>* %yp, align 4
|
||||
|
@ -1041,8 +1013,7 @@ define <16 x float> @test36(<16 x float> %x, <16 x float> %x1, <16 x float>* %yp
|
|||
; CHECK-LABEL: test36:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcmpltps (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x float>, <16 x float>* %yp, align 4
|
||||
%mask = fcmp olt <16 x float> %x, %y
|
||||
|
@ -1054,8 +1025,7 @@ define <8 x double> @test37(<8 x double> %x, <8 x double> %x1, double* %ptr) nou
|
|||
; CHECK-LABEL: test37:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovapd %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovapd %zmm1, %zmm0
|
||||
; CHECK-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%a = load double, double* %ptr
|
||||
|
@ -1078,8 +1048,7 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou
|
|||
; SKX-LABEL: test38:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltpd (%rdi){1to4}, %ymm0, %k1
|
||||
; SKX-NEXT: vmovapd %ymm0, %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%a = load double, double* %ptr
|
||||
|
@ -1102,8 +1071,7 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou
|
|||
; SKX-LABEL: test39:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltpd (%rdi){1to2}, %xmm0, %k1
|
||||
; SKX-NEXT: vmovapd %xmm0, %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%a = load double, double* %ptr
|
||||
|
@ -1120,8 +1088,7 @@ define <16 x float> @test40(<16 x float> %x, <16 x float> %x1, float* %ptr) n
|
|||
; CHECK-LABEL: test40:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovaps %zmm1, %zmm0
|
||||
; CHECK-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
|
||||
%a = load float, float* %ptr
|
||||
|
@ -1140,15 +1107,14 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun
|
|||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
|
||||
; KNL-NEXT: vbroadcastss (%rdi), %ymm2
|
||||
; KNL-NEXT: vcmpltps %zmm2, %zmm0, %k1
|
||||
; KNL-NEXT: vmovaps %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vmovaps %ymm1, %ymm0
|
||||
; KNL-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test41:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltps (%rdi){1to8}, %ymm0, %k1
|
||||
; SKX-NEXT: vmovaps %ymm0, %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmps %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%a = load float, float* %ptr
|
||||
|
@ -1171,8 +1137,7 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun
|
|||
; SKX-LABEL: test42:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vcmpltps (%rdi){1to4}, %xmm0, %k1
|
||||
; SKX-NEXT: vmovaps %xmm0, %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmps %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%a = load float, float* %ptr
|
||||
|
@ -1191,8 +1156,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
|
|||
; KNL-NEXT: vpsllq $63, %zmm2, %zmm2
|
||||
; KNL-NEXT: vptestmq %zmm2, %zmm2, %k1
|
||||
; KNL-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||
; KNL-NEXT: vmovapd %zmm0, %zmm1 {%k1}
|
||||
; KNL-NEXT: vmovapd %zmm1, %zmm0
|
||||
; KNL-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; KNL-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test43:
|
||||
|
@ -1200,8 +1164,7 @@ define <8 x double> @test43(<8 x double> %x, <8 x double> %x1, double* %ptr,<8 x
|
|||
; SKX-NEXT: vpsllw $15, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpmovw2m %xmm2, %k1
|
||||
; SKX-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 {%k1}
|
||||
; SKX-NEXT: vmovapd %zmm0, %zmm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %zmm1, %zmm0
|
||||
; SKX-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
|
||||
%a = load double, double* %ptr
|
||||
|
|
|
@ -5,8 +5,7 @@ define <64 x i8> @test1(<64 x i8> %x, <64 x i8> %y) nounwind {
|
|||
; CHECK-LABEL: test1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqb %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp eq <64 x i8> %x, %y
|
||||
%max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
|
||||
|
@ -17,8 +16,7 @@ define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
|
|||
; CHECK-LABEL: test2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sgt <64 x i8> %x, %y
|
||||
%max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
|
||||
|
@ -29,8 +27,7 @@ define <32 x i16> @test3(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1) nounwind
|
|||
; CHECK-LABEL: test3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew %zmm0, %zmm1, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sge <32 x i16> %x, %y
|
||||
%max = select <32 x i1> %mask, <32 x i16> %x1, <32 x i16> %y
|
||||
|
@ -41,8 +38,7 @@ define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
|
|||
; CHECK-LABEL: test4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleub %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmb %zmm2, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp ugt <64 x i8> %x, %y
|
||||
%max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
|
||||
|
@ -53,8 +49,7 @@ define <32 x i16> @test5(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %yp) nounwin
|
|||
; CHECK-LABEL: test5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <32 x i16>, <32 x i16>* %yp, align 4
|
||||
%mask = icmp eq <32 x i16> %x, %y
|
||||
|
@ -66,8 +61,7 @@ define <32 x i16> @test6(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun
|
|||
; CHECK-LABEL: test6:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtw (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
|
||||
%mask = icmp sgt <32 x i16> %x, %y
|
||||
|
@ -79,8 +73,7 @@ define <32 x i16> @test7(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun
|
|||
; CHECK-LABEL: test7:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
|
||||
%mask = icmp sle <32 x i16> %x, %y
|
||||
|
@ -92,8 +85,7 @@ define <32 x i16> @test8(<32 x i16> %x, <32 x i16> %x1, <32 x i16>* %y.ptr) noun
|
|||
; CHECK-LABEL: test8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
|
||||
%mask = icmp ule <32 x i16> %x, %y
|
||||
|
@ -106,8 +98,7 @@ define <32 x i16> @test9(<32 x i16> %x, <32 x i16> %y, <32 x i16> %x1, <32 x i16
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpeqw %zmm3, %zmm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp eq <32 x i16> %x1, %y1
|
||||
%mask0 = icmp eq <32 x i16> %x, %y
|
||||
|
@ -121,8 +112,7 @@ define <64 x i8> @test10(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1, <64 x i8> %y
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleb %zmm1, %zmm0, %k1
|
||||
; CHECK-NEXT: vpcmpleb %zmm2, %zmm3, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpblendmb %zmm0, %zmm2, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <64 x i8> %x1, %y1
|
||||
%mask0 = icmp sle <64 x i8> %x, %y
|
||||
|
@ -136,8 +126,7 @@ define <64 x i8> @test11(<64 x i8> %x, <64 x i8>* %y.ptr, <64 x i8> %x1, <64 x i
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %zmm2, %zmm1, %k1
|
||||
; CHECK-NEXT: vpcmpgtb (%rdi), %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmb %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sgt <64 x i8> %x1, %y1
|
||||
%y = load <64 x i8>, <64 x i8>* %y.ptr, align 4
|
||||
|
@ -152,8 +141,7 @@ define <32 x i16> @test12(<32 x i16> %x, <32 x i16>* %y.ptr, <32 x i16> %x1, <32
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew %zmm1, %zmm2, %k1
|
||||
; CHECK-NEXT: vpcmpleuw (%rdi), %zmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu16 %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <32 x i16> %x1, %y1
|
||||
%y = load <32 x i16>, <32 x i16>* %y.ptr, align 4
|
||||
|
|
|
@ -5,8 +5,7 @@ define <32 x i8> @test256_1(<32 x i8> %x, <32 x i8> %y) nounwind {
|
|||
; CHECK-LABEL: test256_1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp eq <32 x i8> %x, %y
|
||||
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %y
|
||||
|
@ -17,8 +16,7 @@ define <32 x i8> @test256_2(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind
|
|||
; CHECK-LABEL: test256_2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sgt <32 x i8> %x, %y
|
||||
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
|
||||
|
@ -29,8 +27,7 @@ define <16 x i16> @test256_3(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1) nounw
|
|||
; CHECK-LABEL: test256_3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %ymm2, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmw %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sge <16 x i16> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i16> %x1, <16 x i16> %y
|
||||
|
@ -41,8 +38,7 @@ define <32 x i8> @test256_4(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1) nounwind
|
|||
; CHECK-LABEL: test256_4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp ugt <32 x i8> %x, %y
|
||||
%max = select <32 x i1> %mask, <32 x i8> %x, <32 x i8> %x1
|
||||
|
@ -53,8 +49,7 @@ define <16 x i16> @test256_5(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %yp) nou
|
|||
; CHECK-LABEL: test256_5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i16>, <16 x i16>* %yp, align 4
|
||||
%mask = icmp eq <16 x i16> %x, %y
|
||||
|
@ -66,8 +61,7 @@ define <16 x i16> @test256_6(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr)
|
|||
; CHECK-LABEL: test256_6:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtw (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
|
||||
%mask = icmp sgt <16 x i16> %x, %y
|
||||
|
@ -79,8 +73,7 @@ define <16 x i16> @test256_7(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr)
|
|||
; CHECK-LABEL: test256_7:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
|
||||
%mask = icmp sle <16 x i16> %x, %y
|
||||
|
@ -92,8 +85,7 @@ define <16 x i16> @test256_8(<16 x i16> %x, <16 x i16> %x1, <16 x i16>* %y.ptr)
|
|||
; CHECK-LABEL: test256_8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
|
||||
%mask = icmp ule <16 x i16> %x, %y
|
||||
|
@ -106,8 +98,7 @@ define <16 x i16> @test256_9(<16 x i16> %x, <16 x i16> %y, <16 x i16> %x1, <16 x
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vpcmpeqw %ymm3, %ymm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp eq <16 x i16> %x1, %y1
|
||||
%mask0 = icmp eq <16 x i16> %x, %y
|
||||
|
@ -121,8 +112,7 @@ define <32 x i8> @test256_10(<32 x i8> %x, <32 x i8> %y, <32 x i8> %x1, <32 x i8
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vpcmpleb %ymm2, %ymm3, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; CHECK-NEXT: vpblendmb %ymm0, %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <32 x i8> %x1, %y1
|
||||
%mask0 = icmp sle <32 x i8> %x, %y
|
||||
|
@ -136,8 +126,7 @@ define <32 x i8> @test256_11(<32 x i8> %x, <32 x i8>* %y.ptr, <32 x i8> %x1, <32
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vpcmpgtb (%rdi), %ymm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sgt <32 x i8> %x1, %y1
|
||||
%y = load <32 x i8>, <32 x i8>* %y.ptr, align 4
|
||||
|
@ -152,8 +141,7 @@ define <16 x i16> @test256_12(<16 x i16> %x, <16 x i16>* %y.ptr, <16 x i16> %x1,
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew %ymm1, %ymm2, %k1
|
||||
; CHECK-NEXT: vpcmpleuw (%rdi), %ymm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <16 x i16> %x1, %y1
|
||||
%y = load <16 x i16>, <16 x i16>* %y.ptr, align 4
|
||||
|
@ -167,8 +155,7 @@ define <16 x i8> @test128_1(<16 x i8> %x, <16 x i8> %y) nounwind {
|
|||
; CHECK-LABEL: test128_1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp eq <16 x i8> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %y
|
||||
|
@ -179,8 +166,7 @@ define <16 x i8> @test128_2(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind
|
|||
; CHECK-LABEL: test128_2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sgt <16 x i8> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
|
||||
|
@ -191,8 +177,7 @@ define <8 x i16> @test128_3(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1) nounwind
|
|||
; CHECK-LABEL: test128_3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %xmm2, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmw %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sge <8 x i16> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i16> %x1, <8 x i16> %y
|
||||
|
@ -203,8 +188,7 @@ define <16 x i8> @test128_4(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1) nounwind
|
|||
; CHECK-LABEL: test128_4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp ugt <16 x i8> %x, %y
|
||||
%max = select <16 x i1> %mask, <16 x i8> %x, <16 x i8> %x1
|
||||
|
@ -215,8 +199,7 @@ define <8 x i16> @test128_5(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %yp) nounwin
|
|||
; CHECK-LABEL: test128_5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i16>, <8 x i16>* %yp, align 4
|
||||
%mask = icmp eq <8 x i16> %x, %y
|
||||
|
@ -228,8 +211,7 @@ define <8 x i16> @test128_6(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun
|
|||
; CHECK-LABEL: test128_6:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtw (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
|
||||
%mask = icmp sgt <8 x i16> %x, %y
|
||||
|
@ -241,8 +223,7 @@ define <8 x i16> @test128_7(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun
|
|||
; CHECK-LABEL: test128_7:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
|
||||
%mask = icmp sle <8 x i16> %x, %y
|
||||
|
@ -254,8 +235,7 @@ define <8 x i16> @test128_8(<8 x i16> %x, <8 x i16> %x1, <8 x i16>* %y.ptr) noun
|
|||
; CHECK-LABEL: test128_8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
|
||||
%mask = icmp ule <8 x i16> %x, %y
|
||||
|
@ -268,8 +248,7 @@ define <8 x i16> @test128_9(<8 x i16> %x, <8 x i16> %y, <8 x i16> %x1, <8 x i16>
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vpcmpeqw %xmm3, %xmm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp eq <8 x i16> %x1, %y1
|
||||
%mask0 = icmp eq <8 x i16> %x, %y
|
||||
|
@ -283,8 +262,7 @@ define <16 x i8> @test128_10(<16 x i8> %x, <16 x i8> %y, <16 x i8> %x1, <16 x i8
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vpcmpleb %xmm2, %xmm3, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; CHECK-NEXT: vpblendmb %xmm0, %xmm2, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <16 x i8> %x1, %y1
|
||||
%mask0 = icmp sle <16 x i8> %x, %y
|
||||
|
@ -298,8 +276,7 @@ define <16 x i8> @test128_11(<16 x i8> %x, <16 x i8>* %y.ptr, <16 x i8> %x1, <16
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtb %xmm2, %xmm1, %k1
|
||||
; CHECK-NEXT: vpcmpgtb (%rdi), %xmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sgt <16 x i8> %x1, %y1
|
||||
%y = load <16 x i8>, <16 x i8>* %y.ptr, align 4
|
||||
|
@ -314,8 +291,7 @@ define <8 x i16> @test128_12(<8 x i16> %x, <8 x i16>* %y.ptr, <8 x i16> %x1, <8
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmplew %xmm1, %xmm2, %k1
|
||||
; CHECK-NEXT: vpcmpleuw (%rdi), %xmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmw %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <8 x i16> %x1, %y1
|
||||
%y = load <8 x i16>, <8 x i16>* %y.ptr, align 4
|
||||
|
|
|
@ -5,8 +5,7 @@ define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind {
|
|||
; CHECK-LABEL: test256_1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp eq <4 x i64> %x, %y
|
||||
%max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
|
||||
|
@ -17,8 +16,7 @@ define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind
|
|||
; CHECK-LABEL: test256_2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sgt <4 x i64> %x, %y
|
||||
%max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
|
||||
|
@ -29,8 +27,7 @@ define <8 x i32> @test256_3(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1) nounwind
|
|||
; CHECK-LABEL: test256_3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm2, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sge <8 x i32> %x, %y
|
||||
%max = select <8 x i1> %mask, <8 x i32> %x1, <8 x i32> %y
|
||||
|
@ -41,8 +38,7 @@ define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind
|
|||
; CHECK-LABEL: test256_4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm2, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmq %ymm2, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp ugt <4 x i64> %x, %y
|
||||
%max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
|
||||
|
@ -53,8 +49,7 @@ define <8 x i32> @test256_5(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwin
|
|||
; CHECK-LABEL: test256_5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %yp, align 4
|
||||
%mask = icmp eq <8 x i32> %x, %y
|
||||
|
@ -66,8 +61,7 @@ define <8 x i32> @test256_5b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi
|
|||
; CHECK-LABEL: test256_5b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %yp, align 4
|
||||
%mask = icmp eq <8 x i32> %y, %x
|
||||
|
@ -79,8 +73,7 @@ define <8 x i32> @test256_6(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun
|
|||
; CHECK-LABEL: test256_6:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sgt <8 x i32> %x, %y
|
||||
|
@ -92,8 +85,7 @@ define <8 x i32> @test256_6b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test256_6b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
|
||||
%mask = icmp slt <8 x i32> %y, %x
|
||||
|
@ -105,8 +97,7 @@ define <8 x i32> @test256_7(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun
|
|||
; CHECK-LABEL: test256_7:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sle <8 x i32> %x, %y
|
||||
|
@ -118,8 +109,7 @@ define <8 x i32> @test256_7b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test256_7b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sge <8 x i32> %y, %x
|
||||
|
@ -131,8 +121,7 @@ define <8 x i32> @test256_8(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) noun
|
|||
; CHECK-LABEL: test256_8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
|
||||
%mask = icmp ule <8 x i32> %x, %y
|
||||
|
@ -144,8 +133,7 @@ define <8 x i32> @test256_8b(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test256_8b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
|
||||
%mask = icmp uge <8 x i32> %y, %x
|
||||
|
@ -158,8 +146,7 @@ define <8 x i32> @test256_9(<8 x i32> %x, <8 x i32> %y, <8 x i32> %x1, <8 x i32>
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vpcmpeqd %ymm3, %ymm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp eq <8 x i32> %x1, %y1
|
||||
%mask0 = icmp eq <8 x i32> %x, %y
|
||||
|
@ -173,8 +160,7 @@ define <4 x i64> @test256_10(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1, <4 x i64
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k1
|
||||
; CHECK-NEXT: vpcmpleq %ymm2, %ymm3, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm2, %ymm0
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm2, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <4 x i64> %x1, %y1
|
||||
%mask0 = icmp sle <4 x i64> %x, %y
|
||||
|
@ -188,8 +174,7 @@ define <4 x i64> @test256_11(<4 x i64> %x, <4 x i64>* %y.ptr, <4 x i64> %x1, <4
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %ymm2, %ymm1, %k1
|
||||
; CHECK-NEXT: vpcmpgtq (%rdi), %ymm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sgt <4 x i64> %x1, %y1
|
||||
%y = load <4 x i64>, <4 x i64>* %y.ptr, align 4
|
||||
|
@ -204,8 +189,7 @@ define <8 x i32> @test256_12(<8 x i32> %x, <8 x i32>* %y.ptr, <8 x i32> %x1, <8
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <8 x i32> %x1, %y1
|
||||
%y = load <8 x i32>, <8 x i32>* %y.ptr, align 4
|
||||
|
@ -219,8 +203,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind
|
|||
; CHECK-LABEL: test256_13:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%yb = load i64, i64* %yb.ptr, align 4
|
||||
%y.0 = insertelement <4 x i64> undef, i64 %yb, i32 0
|
||||
|
@ -234,8 +217,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind
|
|||
; CHECK-LABEL: test256_14:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi){1to8}, %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%yb = load i32, i32* %yb.ptr, align 4
|
||||
%y.0 = insertelement <8 x i32> undef, i32 %yb, i32 0
|
||||
|
@ -250,8 +232,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %ymm1, %ymm2, %k1
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <8 x i32> %x1, %y1
|
||||
%yb = load i32, i32* %yb.ptr, align 4
|
||||
|
@ -268,8 +249,7 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleq %ymm1, %ymm2, %k1
|
||||
; CHECK-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <4 x i64> %x1, %y1
|
||||
%yb = load i64, i64* %yb.ptr, align 4
|
||||
|
@ -285,8 +265,7 @@ define <8 x i32> @test256_17(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi
|
|||
; CHECK-LABEL: test256_17:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %yp, align 4
|
||||
%mask = icmp ne <8 x i32> %x, %y
|
||||
|
@ -298,8 +277,7 @@ define <8 x i32> @test256_18(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi
|
|||
; CHECK-LABEL: test256_18:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpneqd (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %yp, align 4
|
||||
%mask = icmp ne <8 x i32> %y, %x
|
||||
|
@ -311,8 +289,7 @@ define <8 x i32> @test256_19(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi
|
|||
; CHECK-LABEL: test256_19:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnltud (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %yp, align 4
|
||||
%mask = icmp uge <8 x i32> %x, %y
|
||||
|
@ -324,8 +301,7 @@ define <8 x i32> @test256_20(<8 x i32> %x, <8 x i32> %x1, <8 x i32>* %yp) nounwi
|
|||
; CHECK-LABEL: test256_20:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %ymm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <8 x i32>, <8 x i32>* %yp, align 4
|
||||
%mask = icmp uge <8 x i32> %y, %x
|
||||
|
@ -337,8 +313,7 @@ define <2 x i64> @test128_1(<2 x i64> %x, <2 x i64> %y) nounwind {
|
|||
; CHECK-LABEL: test128_1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp eq <2 x i64> %x, %y
|
||||
%max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
|
||||
|
@ -349,8 +324,7 @@ define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind
|
|||
; CHECK-LABEL: test128_2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sgt <2 x i64> %x, %y
|
||||
%max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
|
||||
|
@ -361,8 +335,7 @@ define <4 x i32> @test128_3(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1) nounwind
|
|||
; CHECK-LABEL: test128_3:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm2, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp sge <4 x i32> %x, %y
|
||||
%max = select <4 x i1> %mask, <4 x i32> %x1, <4 x i32> %y
|
||||
|
@ -373,8 +346,7 @@ define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind
|
|||
; CHECK-LABEL: test128_4:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %xmm2, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmq %xmm2, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask = icmp ugt <2 x i64> %x, %y
|
||||
%max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
|
||||
|
@ -385,8 +357,7 @@ define <4 x i32> @test128_5(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwin
|
|||
; CHECK-LABEL: test128_5:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %yp, align 4
|
||||
%mask = icmp eq <4 x i32> %x, %y
|
||||
|
@ -398,8 +369,7 @@ define <4 x i32> @test128_5b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %yp) nounwi
|
|||
; CHECK-LABEL: test128_5b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %yp, align 4
|
||||
%mask = icmp eq <4 x i32> %y, %x
|
||||
|
@ -411,8 +381,7 @@ define <4 x i32> @test128_6(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun
|
|||
; CHECK-LABEL: test128_6:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sgt <4 x i32> %x, %y
|
||||
|
@ -424,8 +393,7 @@ define <4 x i32> @test128_6b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test128_6b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp slt <4 x i32> %y, %x
|
||||
|
@ -437,8 +405,7 @@ define <4 x i32> @test128_7(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun
|
|||
; CHECK-LABEL: test128_7:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sle <4 x i32> %x, %y
|
||||
|
@ -450,8 +417,7 @@ define <4 x i32> @test128_7b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test128_7b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp sge <4 x i32> %y, %x
|
||||
|
@ -463,8 +429,7 @@ define <4 x i32> @test128_8(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) noun
|
|||
; CHECK-LABEL: test128_8:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp ule <4 x i32> %x, %y
|
||||
|
@ -476,8 +441,7 @@ define <4 x i32> @test128_8b(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test128_8b:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp uge <4 x i32> %y, %x
|
||||
|
@ -490,8 +454,7 @@ define <4 x i32> @test128_9(<4 x i32> %x, <4 x i32> %y, <4 x i32> %x1, <4 x i32>
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vpcmpeqd %xmm3, %xmm2, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp eq <4 x i32> %x1, %y1
|
||||
%mask0 = icmp eq <4 x i32> %x, %y
|
||||
|
@ -505,8 +468,7 @@ define <2 x i64> @test128_10(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1, <2 x i64
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k1
|
||||
; CHECK-NEXT: vpcmpleq %xmm2, %xmm3, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm2 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm2, %xmm0
|
||||
; CHECK-NEXT: vpblendmq %xmm0, %xmm2, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <2 x i64> %x1, %y1
|
||||
%mask0 = icmp sle <2 x i64> %x, %y
|
||||
|
@ -520,8 +482,7 @@ define <2 x i64> @test128_11(<2 x i64> %x, <2 x i64>* %y.ptr, <2 x i64> %x1, <2
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpgtq %xmm2, %xmm1, %k1
|
||||
; CHECK-NEXT: vpcmpgtq (%rdi), %xmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sgt <2 x i64> %x1, %y1
|
||||
%y = load <2 x i64>, <2 x i64>* %y.ptr, align 4
|
||||
|
@ -536,8 +497,7 @@ define <4 x i32> @test128_12(<4 x i32> %x, <4 x i32>* %y.ptr, <4 x i32> %x1, <4
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <4 x i32> %x1, %y1
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
|
@ -551,8 +511,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind
|
|||
; CHECK-LABEL: test128_13:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%yb = load i64, i64* %yb.ptr, align 4
|
||||
%y.0 = insertelement <2 x i64> undef, i64 %yb, i32 0
|
||||
|
@ -566,8 +525,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind
|
|||
; CHECK-LABEL: test128_14:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled (%rdi){1to4}, %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%yb = load i32, i32* %yb.ptr, align 4
|
||||
%y.0 = insertelement <4 x i32> undef, i32 %yb, i32 0
|
||||
|
@ -582,8 +540,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpled %xmm1, %xmm2, %k1
|
||||
; CHECK-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <4 x i32> %x1, %y1
|
||||
%yb = load i32, i32* %yb.ptr, align 4
|
||||
|
@ -600,8 +557,7 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64
|
|||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleq %xmm1, %xmm2, %k1
|
||||
; CHECK-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%mask1 = icmp sge <2 x i64> %x1, %y1
|
||||
%yb = load i64, i64* %yb.ptr, align 4
|
||||
|
@ -617,8 +573,7 @@ define <4 x i32> @test128_17(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test128_17:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp ne <4 x i32> %x, %y
|
||||
|
@ -630,8 +585,7 @@ define <4 x i32> @test128_18(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test128_18:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpneqd (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp ne <4 x i32> %y, %x
|
||||
|
@ -643,8 +597,7 @@ define <4 x i32> @test128_19(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test128_19:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpnltud (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp uge <4 x i32> %x, %y
|
||||
|
@ -656,8 +609,7 @@ define <4 x i32> @test128_20(<4 x i32> %x, <4 x i32> %x1, <4 x i32>* %y.ptr) nou
|
|||
; CHECK-LABEL: test128_20:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpcmpleud (%rdi), %xmm0, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %xmm0, %xmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%y = load <4 x i32>, <4 x i32>* %y.ptr, align 4
|
||||
%mask = icmp uge <4 x i32> %y, %x
|
||||
|
|
|
@ -29,8 +29,7 @@ define <2 x double> @test6(<2 x i64> %trigger, <2 x double>* %addr, <2 x double>
|
|||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vmovupd (%rdi), %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmpd (%rdi), %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i64> %trigger, zeroinitializer
|
||||
%res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %addr, i32 4, <2 x i1>%mask, <2 x double>%dst)
|
||||
|
@ -58,8 +57,7 @@ define <4 x float> @test7(<4 x i32> %trigger, <4 x float>* %addr, <4 x float> %d
|
|||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %addr, i32 4, <4 x i1>%mask, <4 x float>%dst)
|
||||
|
@ -95,8 +93,7 @@ define <4 x i32> @test8(<4 x i32> %trigger, <4 x i32>* %addr, <4 x i32> %dst) {
|
|||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu32 (%rdi), %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; SKX-NEXT: vpblendmd (%rdi), %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst)
|
||||
|
@ -171,8 +168,7 @@ define <4 x double> @test10(<4 x i32> %trigger, <4 x double>* %addr, <4 x double
|
|||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2
|
||||
; SKX-NEXT: vpcmpeqd %xmm2, %xmm0, %k1
|
||||
; SKX-NEXT: vmovapd (%rdi), %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovapd %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmpd (%rdi), %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <4 x i32> %trigger, zeroinitializer
|
||||
%res = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %addr, i32 32, <4 x i1>%mask, <4 x double>%dst)
|
||||
|
@ -246,16 +242,15 @@ define <8 x float> @test11a(<8 x i32> %trigger, <8 x float>* %addr, <8 x float>
|
|||
; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm0, %k0
|
||||
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
|
||||
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512F-NEXT: vmovups (%rdi), %zmm1 {%k1}
|
||||
; AVX512F-NEXT: vmovaps %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vblendmps (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test11a:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpxor %ymm2, %ymm2, %ymm2
|
||||
; SKX-NEXT: vpcmpeqd %ymm2, %ymm0, %k1
|
||||
; SKX-NEXT: vmovaps (%rdi), %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %ymm1, %ymm0
|
||||
; SKX-NEXT: vblendmps (%rdi), %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <8 x i32> %trigger, zeroinitializer
|
||||
%res = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %addr, i32 32, <8 x i1>%mask, <8 x float>%dst)
|
||||
|
@ -293,16 +288,15 @@ define <8 x i32> @test11b(<8 x i1> %mask, <8 x i32>* %addr, <8 x i32> %dst) {
|
|||
; AVX512F-NEXT: vptestmq %zmm0, %zmm0, %k0
|
||||
; AVX512F-NEXT: kshiftlw $8, %k0, %k0
|
||||
; AVX512F-NEXT: kshiftrw $8, %k0, %k1
|
||||
; AVX512F-NEXT: vmovdqu32 (%rdi), %zmm1 {%k1}
|
||||
; AVX512F-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512F-NEXT: vpblendmd (%rdi), %zmm1, %zmm0 {%k1}
|
||||
; AVX512F-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
|
||||
; AVX512F-NEXT: retq
|
||||
;
|
||||
; SKX-LABEL: test11b:
|
||||
; SKX: ## BB#0:
|
||||
; SKX-NEXT: vpsllw $15, %xmm0, %xmm0
|
||||
; SKX-NEXT: vpmovw2m %xmm0, %k1
|
||||
; SKX-NEXT: vmovdqu32 (%rdi), %ymm1 {%k1}
|
||||
; SKX-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; SKX-NEXT: vpblendmd (%rdi), %ymm1, %ymm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%res = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %addr, i32 4, <8 x i1>%mask, <8 x i32>%dst)
|
||||
ret <8 x i32> %res
|
||||
|
@ -557,8 +551,7 @@ define <2 x float> @test16(<2 x i32> %trigger, <2 x float>* %addr, <2 x float> %
|
|||
; SKX-NEXT: vpcmpeqq %xmm2, %xmm0, %k0
|
||||
; SKX-NEXT: kshiftlw $14, %k0, %k0
|
||||
; SKX-NEXT: kshiftrw $14, %k0, %k1
|
||||
; SKX-NEXT: vmovups (%rdi), %xmm1 {%k1}
|
||||
; SKX-NEXT: vmovaps %xmm1, %xmm0
|
||||
; SKX-NEXT: vblendmps (%rdi), %xmm1, %xmm0 {%k1}
|
||||
; SKX-NEXT: retq
|
||||
%mask = icmp eq <2 x i32> %trigger, zeroinitializer
|
||||
%res = call <2 x float> @llvm.masked.load.v2f32.p0v2f32(<2 x float>* %addr, i32 4, <2 x i1>%mask, <2 x float>%dst)
|
||||
|
|
|
@ -425,8 +425,7 @@ define <16 x i8> @shuffle_v16i8_00_17_02_19_04_21_06_23_08_25_10_27_12_29_14_31(
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movw $-21846, %ax # imm = 0xAAAA
|
||||
; AVX512VL-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
|
||||
ret <16 x i8> %shuffle
|
||||
|
@ -466,8 +465,7 @@ define <16 x i8> @shuffle_v16i8_00_01_02_19_04_05_06_23_08_09_10_27_12_13_14_31(
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movw $-30584, %ax # imm = 0x8888
|
||||
; AVX512VL-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
|
||||
ret <16 x i8> %shuffle
|
||||
|
@ -526,8 +524,7 @@ define <16 x i8> @shuffle_v16i8_00_01_02_03_20_05_06_23_08_09_10_11_28_13_14_31(
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movw $-28528, %ax # imm = 0x9090
|
||||
; AVX512VL-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu8 %xmm0, %xmm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0
|
||||
; AVX512VL-NEXT: vpblendmb %xmm0, %xmm1, %xmm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 5, i32 6, i32 23, i32 8, i32 9, i32 10, i32 11, i32 28, i32 13, i32 14, i32 31>
|
||||
ret <16 x i8> %shuffle
|
||||
|
|
|
@ -719,8 +719,7 @@ define <16 x i16> @shuffle_v16i16_00_01_02_03_04_05_06_07_08_09_10_11_12_13_14_3
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movw $-32768, %ax # imm = 0x8000
|
||||
; AVX512VL-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 31>
|
||||
ret <16 x i16> %shuffle
|
||||
|
@ -745,8 +744,7 @@ define <16 x i16> @shuffle_v16i16_16_01_02_03_04_05_06_07_08_09_10_11_12_13_14_1
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movw $1, %ax
|
||||
; AVX512VL-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
|
||||
ret <16 x i16> %shuffle
|
||||
|
@ -771,8 +769,7 @@ define <16 x i16> @shuffle_v16i16_00_17_02_19_04_21_06_23_24_09_26_11_28_13_30_1
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movw $21930, %ax # imm = 0x55AA
|
||||
; AVX512VL-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15>
|
||||
ret <16 x i16> %shuffle
|
||||
|
@ -797,8 +794,7 @@ define <16 x i16> @shuffle_v16i16_16_01_18_03_20_05_22_07_08_25_10_27_12_29_14_3
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movw $-21931, %ax # imm = 0xAA55
|
||||
; AVX512VL-NEXT: kmovw %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpblendmw %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31>
|
||||
ret <16 x i16> %shuffle
|
||||
|
|
|
@ -1036,8 +1036,7 @@ define <32 x i8> @shuffle_v32i8_00_33_02_35_04_37_06_39_08_41_10_43_12_45_14_47_
|
|||
; AVX512VL: # BB#0:
|
||||
; AVX512VL-NEXT: movl $-1431655766, %eax # imm = 0xAAAAAAAA
|
||||
; AVX512VL-NEXT: kmovd %eax, %k1
|
||||
; AVX512VL-NEXT: vmovdqu8 %ymm0, %ymm1 {%k1}
|
||||
; AVX512VL-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; AVX512VL-NEXT: vpblendmb %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; AVX512VL-NEXT: retq
|
||||
%shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 0, i32 33, i32 2, i32 35, i32 4, i32 37, i32 6, i32 39, i32 8, i32 41, i32 10, i32 43, i32 12, i32 45, i32 14, i32 47, i32 16, i32 49, i32 18, i32 51, i32 20, i32 53, i32 22, i32 55, i32 24, i32 57, i32 26, i32 59, i32 28, i32 61, i32 30, i32 63>
|
||||
ret <32 x i8> %shuffle
|
||||
|
|
|
@ -254,8 +254,7 @@ define <16 x i32> @shuffle_v16i32_0_1_2_19_u_u_u_u_u_u_u_u_u_u_u_u(<16 x i32> %a
|
|||
; ALL: # BB#0:
|
||||
; ALL-NEXT: movw $8, %ax
|
||||
; ALL-NEXT: kmovw %eax, %k1
|
||||
; ALL-NEXT: vmovdqa32 %zmm0, %zmm1 {%k1}
|
||||
; ALL-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; ALL-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1}
|
||||
; ALL-NEXT: retq
|
||||
%c = shufflevector <16 x i32> %a, <16 x i32> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
ret <16 x i32> %c
|
||||
|
|
|
@ -216,8 +216,7 @@ define <8 x i32> @mask_shuffle_v8i32_23456701(<8 x i32> %a, <8 x i32> %passthru,
|
|||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
|
||||
; CHECK-NEXT: kmovb %edi, %k1
|
||||
; CHECK-NEXT: vmovdqa32 %ymm0, %ymm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
|
||||
%mask.cast = bitcast i8 %mask to <8 x i1>
|
||||
|
|
Loading…
Reference in New Issue