diff --git a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 106f716aa5cb..772fe1bf1930 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -60,14 +60,38 @@ void X86ATTInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { default: llvm_unreachable("Invalid ssecc argument!"); - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; + case 0x10: O << "eq_os"; break; + case 0x11: O << "lt_oq"; break; + case 0x12: O << "le_oq"; break; + case 0x13: O << "unord_s"; break; + case 0x14: O << "neq_us"; break; + case 0x15: O << "nlt_uq"; break; + case 0x16: O << "nle_uq"; break; + case 0x17: O << "ord_s"; break; + case 0x18: O << "eq_us"; break; + case 0x19: O << "nge_uq"; break; + case 0x1a: O << "ngt_uq"; break; + case 0x1b: O << "false_os"; break; + case 0x1c: O << "neq_os"; break; + case 0x1d: O << "ge_oq"; break; + case 0x1e: O << "gt_oq"; break; + case 0x1f: O << "true_us"; break; } } diff --git a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp index 3bbe12733811..7f1f8a3c4cb7 100644 --- a/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp +++ b/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp @@ -50,14 +50,39 @@ void X86IntelInstPrinter::printSSECC(const MCInst *MI, unsigned Op, raw_ostream &O) { switch (MI->getOperand(Op).getImm()) { default: llvm_unreachable("Invalid ssecc argument!"); - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; + case 0x10: O << "eq_os"; break; + case 0x11: O << "lt_oq"; break; + case 0x12: O << "le_oq"; break; + case 0x13: O << "unord_s"; break; + case 0x14: O << "neq_us"; break; + case 0x15: O << "nlt_uq"; break; + case 0x16: O << "nle_uq"; break; + case 0x17: O << "ord_s"; break; + case 0x18: O << "eq_us"; break; + case 0x19: O << "nge_uq"; break; + case 0x1a: O << "ngt_uq"; break; + case 0x1b: O << "false_os"; break; + case 0x1c: O << "neq_os"; break; + case 0x1d: O << "ge_oq"; break; + case 0x1e: O << "gt_oq"; break; + case 0x1f: O << "true_us"; break; + } } diff --git a/llvm/lib/Target/X86/X86AsmPrinter.cpp b/llvm/lib/Target/X86/X86AsmPrinter.cpp index 4c3ff02826b0..5949064ee387 100644 --- a/llvm/lib/Target/X86/X86AsmPrinter.cpp +++ b/llvm/lib/Target/X86/X86AsmPrinter.cpp @@ -266,14 +266,38 @@ void X86AsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op, unsigned char value = MI->getOperand(Op).getImm(); assert(value <= 7 && "Invalid ssecc argument!"); switch (value) { - case 0: O << "eq"; break; - case 1: O << "lt"; break; - case 2: O << "le"; break; - case 3: O << "unord"; break; - case 4: O << "neq"; break; - case 5: O << "nlt"; break; - case 6: O << "nle"; break; - case 7: O << "ord"; break; + case 0: O << "eq"; break; + case 1: O << "lt"; break; + case 2: O << "le"; break; + case 3: O << "unord"; break; + case 4: O << "neq"; break; + case 5: O << "nlt"; break; + case 6: O << "nle"; break; + case 7: O << "ord"; break; + case 8: O << "eq_uq"; break; + case 9: O << "nge"; break; + case 0xa: O << "ngt"; break; + case 0xb: O << "false"; break; + case 0xc: O << "neq_oq"; break; + case 0xd: O << "ge"; break; + case 0xe: O << "gt"; break; + case 0xf: O << "true"; break; + case 0x10: O << "eq_os"; break; + case 0x11: O << "lt_oq"; break; + case 0x12: O << "le_oq"; break; + case 0x13: O << "unord_s"; break; + case 0x14: O << "neq_us"; break; + case 0x15: O << "nlt_uq"; break; + case 0x16: O << "nle_uq"; break; + case 0x17: O << "ord_s"; break; + case 0x18: O << "eq_us"; break; + case 0x19: O << "nge_uq"; break; + case 0x1a: O << "ngt_uq"; break; + case 0x1b: O << "false_os"; break; + case 0x1c: O << "neq_os"; break; + case 0x1d: O << "ge_oq"; break; + case 0x1e: O << "gt_oq"; break; + case 0x1f: O << "true_us"; break; } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 7660b953699c..5f4e4183d868 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -14597,41 +14597,42 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG, if (!DCI.isBeforeLegalizeOps()) return SDValue(); - if (!Subtarget->hasAVX()) return SDValue(); + if (!Subtarget->hasAVX()) + return SDValue(); - // Optimize vectors in AVX mode - // Sign extend v8i16 to v8i32 and - // v4i32 to v4i64 - // - // Divide input vector into two parts - // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} - // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 - // concat the vectors to original VT + // Optimize vectors in AVX mode + // Sign extend v8i16 to v8i32 and + // v4i32 to v4i64 + // + // Divide input vector into two parts + // for v4i32 the shuffle mask will be { 0, 1, -1, -1} {2, 3, -1, -1} + // use vpmovsx instruction to extend v4i32 -> v2i64; v8i16 -> v4i32 + // concat the vectors to original VT EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); EVT OpVT = Op.getValueType(); DebugLoc dl = N->getDebugLoc(); - if (((VT == MVT::v4i64) && (OpVT == MVT::v4i32)) || - ((VT == MVT::v8i32) && (OpVT == MVT::v8i16))) { + if ((VT == MVT::v4i64 && OpVT == MVT::v4i32) || + (VT == MVT::v8i32 && OpVT == MVT::v8i16)) { unsigned NumElems = OpVT.getVectorNumElements(); SmallVector ShufMask1(NumElems, -1); - for (unsigned i=0; i< NumElems/2; i++) ShufMask1[i] = i; + for (unsigned i = 0; i < NumElems/2; i++) ShufMask1[i] = i; SDValue OpLo = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - ShufMask1.data()); + ShufMask1.data()); SmallVector ShufMask2(NumElems, -1); - for (unsigned i=0; i< NumElems/2; i++) ShufMask2[i] = i+NumElems/2; + for (unsigned i = 0; i < NumElems/2; i++) ShufMask2[i] = i + NumElems/2; SDValue OpHi = DAG.getVectorShuffle(OpVT, dl, Op, DAG.getUNDEF(OpVT), - ShufMask2.data()); + ShufMask2.data()); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), - VT.getVectorNumElements()/2); - + VT.getVectorNumElements()/2); + OpLo = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpLo); OpHi = DAG.getNode(X86ISD::VSEXT_MOVL, dl, HalfVT, OpHi); diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index c02cae11c693..616601a9438c 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -1766,6 +1766,74 @@ define <8 x float> @test_x86_avx_cmp_ps_256(<8 x float> %a0, <8 x float> %a1) { %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 7) ; <<8 x float>> [#uses=1] ret <8 x float> %res } + +define <8 x float> @test_x86_avx_cmp_ps_256_pseudo_op(<8 x float> %a0, <8 x float> %a1) { + ; CHECK: vcmpeqps + %a2 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a1, i8 0) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpltps + %a3 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a2, i8 1) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpleps + %a4 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a3, i8 2) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpunordps + %a5 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a4, i8 3) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpneqps + %a6 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a5, i8 4) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpnltps + %a7 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a6, i8 5) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpnleps + %a8 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a7, i8 6) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpordps + %a9 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a8, i8 7) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpeq_uqps + %a10 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a9, i8 8) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpngeps + %a11 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a10, i8 9) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpngtps + %a12 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a11, i8 10) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpfalseps + %a13 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a12, i8 11) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpneq_oqps + %a14 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a13, i8 12) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpgeps + %a15 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a14, i8 13) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpgtps + %a16 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a15, i8 14) ; <<8 x float>> [#uses=1] + ; CHECK: vcmptrueps + %a17 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a16, i8 15) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpeq_osps + %a18 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a17, i8 16) ; <<8 x float>> [#uses=1] + ; CHECK: vcmplt_oqps + %a19 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a18, i8 17) ; <<8 x float>> [#uses=1] + ; CHECK: vcmple_oqps + %a20 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a19, i8 18) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpunord_sps + %a21 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a20, i8 19) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpneq_usps + %a22 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a21, i8 20) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpnlt_uqps + %a23 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a22, i8 21) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpnle_uqps + %a24 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a23, i8 22) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpord_sps + %a25 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a24, i8 23) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpeq_usps + %a26 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a25, i8 24) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpnge_uqps + %a27 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a26, i8 25) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpngt_uqps + %a28 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a27, i8 26) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpfalse_osps + %a29 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a28, i8 27) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpneq_osps + %a30 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a29, i8 28) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpge_oqps + %a31 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a30, i8 29) ; <<8 x float>> [#uses=1] + ; CHECK: vcmpgt_oqps + %a32 = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a31, i8 30) ; <<8 x float>> [#uses=1] + ; CHECK: vcmptrue_usps + %res = call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %a0, <8 x float> %a32, i8 31) ; <<8 x float>> [#uses=1] + ret <8 x float> %res +} declare <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone