forked from OSchip/llvm-project
[X86][XOP] Added VPERMIL2PD/VPERMIL2PS as a target shuffle type
llvm-svn: 271831
This commit is contained in:
parent
163987a235
commit
478295dadd
|
@ -3790,6 +3790,7 @@ static bool isTargetShuffle(unsigned Opcode) {
|
|||
case X86ISD::VPERMILPI:
|
||||
case X86ISD::VPERMILPV:
|
||||
case X86ISD::VPERM2X128:
|
||||
case X86ISD::VPERMIL2:
|
||||
case X86ISD::VPERMI:
|
||||
case X86ISD::VPPERM:
|
||||
case X86ISD::VPERMV:
|
||||
|
@ -4929,6 +4930,20 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
|
|||
case X86ISD::MOVLPS:
|
||||
// Not yet implemented
|
||||
return false;
|
||||
case X86ISD::VPERMIL2: {
|
||||
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
|
||||
unsigned MaskEltSize = VT.getScalarSizeInBits();
|
||||
SDValue MaskNode = N->getOperand(2);
|
||||
SDValue CtrlNode = N->getOperand(3);
|
||||
if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
|
||||
unsigned CtrlImm = CtrlOp->getZExtValue();
|
||||
if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
|
||||
DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
case X86ISD::VPPERM: {
|
||||
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
|
||||
SDValue MaskNode = N->getOperand(2);
|
||||
|
@ -30113,6 +30128,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
|
|||
case X86ISD::MOVSD:
|
||||
case X86ISD::VPPERM:
|
||||
case X86ISD::VPERMV3:
|
||||
case X86ISD::VPERMIL2:
|
||||
case X86ISD::VPERMILPI:
|
||||
case X86ISD::VPERMILPV:
|
||||
case X86ISD::VPERM2X128:
|
||||
|
|
|
@ -26,9 +26,7 @@ define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double>
|
|||
define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x double> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2pd256_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovapd {{.*#+}} ymm2 = [2,0,2,0]
|
||||
; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpermil2pd $0, %ymm2, %ymm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
|
||||
%res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
|
||||
|
@ -38,9 +36,7 @@ define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x doubl
|
|||
define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} xmm2 = [3,2,1,0]
|
||||
; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm1, %xmm0
|
||||
; CHECK-NEXT: vpermil2ps $0, %xmm2, %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: vmovaps %xmm1, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
|
||||
%res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
|
||||
|
@ -50,19 +46,27 @@ define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1
|
|||
define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps256_identity:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,2,1,0,1,0,3,2]
|
||||
; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm1, %ymm0
|
||||
; CHECK-NEXT: vpermil2ps $0, %ymm2, %ymm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: vmovaps %ymm1, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
|
||||
%res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
|
||||
ret <8 x float> %res1
|
||||
}
|
||||
|
||||
define <8 x float> @combine_vpermil2ps256_zero(<8 x float> %a0, <8 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps256_zero:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vxorps %ymm0, %ymm0, %ymm0
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>, i8 2)
|
||||
ret <8 x float> %res0
|
||||
}
|
||||
|
||||
define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) {
|
||||
; CHECK-LABEL: combine_vpermil2ps_blend_with_zero:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpermil2ps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
|
||||
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 8, i32 1, i32 2, i32 3>, i8 2)
|
||||
ret <4 x float> %res0
|
||||
|
|
Loading…
Reference in New Issue