diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 774a253cf93d..9ed1e077f496 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21514,6 +21514,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::VSRAI: return "X86ISD::VSRAI"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; + case X86ISD::VPPERM: return "X86ISD::VPPERM"; case X86ISD::CMPP: return "X86ISD::CMPP"; case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ"; case X86ISD::PCMPGT: return "X86ISD::PCMPGT"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 94e2f54d6b28..cf42eb824ce7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -441,6 +441,8 @@ namespace llvm { VPSHA, VPSHL, // XOP signed/unsigned integer comparisons VPCOM, VPCOMU, + // XOP packed permute bytes + VPPERM, // Vector multiply packed unsigned doubleword integers PMULUDQ, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index 260e846c7c74..a5adfd35f0c0 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -251,6 +251,10 @@ def X86vpcomu : SDNode<"X86ISD::VPCOMU", SDTCisSameAs<0,2>, SDTCisVT<3, i8>]>>; +def X86vpperm : SDNode<"X86ISD::VPPERM", + SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>]>>; + def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; diff --git a/llvm/lib/Target/X86/X86InstrXOP.td b/llvm/lib/Target/X86/X86InstrXOP.td index 63082919d5a5..cc3ec2236229 100644 --- a/llvm/lib/Target/X86/X86InstrXOP.td +++ b/llvm/lib/Target/X86/X86InstrXOP.td @@ -222,8 +222,47 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions defm VPCOMUQ : xopvpcom<0xEF, "uq", X86vpcomu, v2i64>; } +multiclass xop4op opc, string OpcodeStr, SDNode OpNode, + ValueType vt128> { + def rr : IXOPi8, + XOP_4V, VEX_I8IMM; + def rm : IXOPi8, + XOP_4V, VEX_I8IMM, VEX_W, MemOp4; + def mr : IXOPi8, + XOP_4V, VEX_I8IMM; + // For disassembler + let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in + def rr_REV : IXOPi8, XOP_4V, VEX_I8IMM, VEX_W, MemOp4; +} + +let ExeDomain = SSEPackedInt in { + defm VPPERM : xop4op<0xA3, "vpperm", X86vpperm, v16i8>; +} + // Instruction where either second or third source can be memory -multiclass xop4op opc, string OpcodeStr, Intrinsic Int> { +multiclass xop4op_int opc, string OpcodeStr, Intrinsic Int> { def rr : IXOPi8 opc, string OpcodeStr, Intrinsic Int> { } let ExeDomain = SSEPackedInt in { - defm VPPERM : xop4op<0xA3, "vpperm", int_x86_xop_vpperm>; - defm VPCMOV : xop4op<0xA2, "vpcmov", int_x86_xop_vpcmov>; + defm VPCMOV : xop4op_int<0xA2, "vpcmov", int_x86_xop_vpcmov>; } multiclass xop4op256 opc, string OpcodeStr, Intrinsic Int> { diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 65aea3dfe15f..1c8ec14a37fa 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -2278,6 +2278,7 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(xop_vpcomuq, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), X86_INTRINSIC_DATA(xop_vpcomuw, INTR_TYPE_3OP, X86ISD::VPCOMU, 0), X86_INTRINSIC_DATA(xop_vpcomw, INTR_TYPE_3OP, X86ISD::VPCOM, 0), + X86_INTRINSIC_DATA(xop_vpperm, INTR_TYPE_3OP, X86ISD::VPPERM, 0), X86_INTRINSIC_DATA(xop_vprotb, INTR_TYPE_2OP, X86ISD::VPROT, 0), X86_INTRINSIC_DATA(xop_vprotbi, INTR_TYPE_2OP, X86ISD::VPROTI, 0), X86_INTRINSIC_DATA(xop_vprotd, INTR_TYPE_2OP, X86ISD::VPROT, 0), diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 6d7439b73565..00294cf2ef63 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -21,11 +21,20 @@ define <16 x i8> @combine_vpperm_identity(<16 x i8> %a0, <16 x i8> %a1) { ret <16 x i8> %res1 } -define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { -; CHECK-LABEL: combine_vpperm_as_unpckhwd: +define <16 x i8> @combine_vpperm_as_unary_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: combine_vpperm_as_unary_unpckhwd: ; CHECK: # BB#0: ; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm0, %xmm0, %xmm0 ; CHECK-NEXT: retq %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a0, <16 x i8> ) ret <16 x i8> %res0 } + +define <16 x i8> @combine_vpperm_as_unpckhwd(<16 x i8> %a0, <16 x i8> %a1) { +; CHECK-LABEL: combine_vpperm_as_unpckhwd: +; CHECK: # BB#0: +; CHECK-NEXT: vpperm {{.*}}(%rip), %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res0 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> ) + ret <16 x i8> %res0 +}