forked from OSchip/llvm-project
[X86][XOP] Support for VPPERM byte shuffle instruction
This patch begins adding support for lowering to the XOP VPPERM instruction - adding the X86ISD::VPPERM opcode. Differential Revision: http://reviews.llvm.org/D18189 llvm-svn: 264260
This commit is contained in:
parent
46b9363683
commit
572ca71573
|
@ -21514,6 +21514,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||
case X86ISD::VSRAI: return "X86ISD::VSRAI";
|
||||
case X86ISD::VROTLI: return "X86ISD::VROTLI";
|
||||
case X86ISD::VROTRI: return "X86ISD::VROTRI";
|
||||
case X86ISD::VPPERM: return "X86ISD::VPPERM";
|
||||
case X86ISD::CMPP: return "X86ISD::CMPP";
|
||||
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
|
||||
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
|
||||
|
|
|
@ -441,6 +441,8 @@ namespace llvm {
|
|||
VPSHA, VPSHL,
|
||||
// XOP signed/unsigned integer comparisons
|
||||
VPCOM, VPCOMU,
|
||||
// XOP packed permute bytes
|
||||
VPPERM,
|
||||
|
||||
// Vector multiply packed unsigned doubleword integers
|
||||
PMULUDQ,
|
||||
|
|
|
@ -251,6 +251,10 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU",
|
|||
SDTCisSameAs<0,2>,
|
||||
SDTCisVT<3, i8>]>>;
|
||||
|
||||
def X86vpperm : SDNode<"X86ISD::VPPERM",
|
||||
SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
|
||||
SDTCisSameAs<0,2>]>>;
|
||||
|
||||
def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>,
|
||||
SDTCisVec<1>,
|
||||
SDTCisSameAs<2, 1>]>;
|
||||
|
|
|
@ -222,8 +222,47 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
|
|||
defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>;
|
||||
}
|
||||
|
||||
multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
||||
ValueType vt128> {
|
||||
def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
(vt128 VR128:$src3))))]>,
|
||||
XOP_4V, VEX_I8IMM;
|
||||
def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, i128mem:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
|
||||
(vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
|
||||
XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
|
||||
def mr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
|
||||
(ins VR128:$src1, i128mem:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[(set VR128:$dst,
|
||||
(v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
|
||||
(vt128 VR128:$src3))))]>,
|
||||
XOP_4V, VEX_I8IMM;
|
||||
// For disassembler
|
||||
let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
|
||||
def rr_REV : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
"\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
|
||||
[]>, XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
|
||||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8>;
|
||||
}
|
||||
|
||||
// Instruction where either second or third source can be memory
|
||||
multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||
multiclass xop4op_int<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||
def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
|
||||
(ins VR128:$src1, VR128:$src2, VR128:$src3),
|
||||
!strconcat(OpcodeStr,
|
||||
|
@ -256,8 +295,7 @@ multiclass xop4op<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
|||
}
|
||||
|
||||
let ExeDomain = SSEPackedInt in {
|
||||
defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>;
|
||||
defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>;
|
||||
defm VPCMOV : xop4op_int<0xA2, "vpcmov", int_x86_xop_vpcmov>;
|
||||
}
|
||||
|
||||
multiclass xop4op256<bits<8> opc, string OpcodeStr, Intrinsic Int> {
|
||||
|
|
|
@ -2278,6 +2278,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
|
|||
X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0),
|
||||
X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0),
|
||||
X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0),
|
||||
X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0),
|
||||
X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0),
|
||||
|
|
|
@ -21,11 +21,20 @@ define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) {
|
|||
ret <16 x i8> %res1
|
||||
}
|
||||
|
||||
define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; CHECK-LABEL: combine_vpperm_as_unpckhwd:
|
||||
define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
|
||||
ret <16 x i8> %res0
|
||||
}
|
||||
|
||||
define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) {
|
||||
; CHECK-LABEL: combine_vpperm_as_unpckhwd:
|
||||
; CHECK: # BB#0:
|
||||
; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0
|
||||
; CHECK-NEXT: retq
|
||||
%res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> <i8 8, i8 24, i8 9, i8 25, i8 10, i8 26, i8 11, i8 27, i8 12, i8 28, i8 13, i8 29, i8 14, i8 30, i8 15, i8 31>)
|
||||
ret <16 x i8> %res0
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue