From 3a13d970aa66ab55707aadfd7b86f32e9139602c Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 14 May 2019 00:39:40 +0000 Subject: [PATCH] [SDAG, x86] allow targets to override test for binop opcodes This follows the pattern of the existing isCommutativeBinOp(). x86 shows improvements from vector narrowing for the min/max opcodes. llvm-svn: 360639 --- llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 14 -------- llvm/include/llvm/CodeGen/TargetLowering.h | 33 +++++++++++++++++++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 ++++----- .../CodeGen/SelectionDAG/TargetLowering.cpp | 3 +- llvm/lib/Target/X86/X86ISelLowering.cpp | 17 ++++++++++ llvm/lib/Target/X86/X86ISelLowering.h | 3 ++ .../CodeGen/X86/vector-reduce-fmax-nnan.ll | 28 ++++++++-------- llvm/test/CodeGen/X86/vector-reduce-fmax.ll | 28 ++++++++-------- .../CodeGen/X86/vector-reduce-fmin-nnan.ll | 28 ++++++++-------- llvm/test/CodeGen/X86/vector-reduce-fmin.ll | 28 ++++++++-------- 10 files changed, 119 insertions(+), 79 deletions(-) diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 0ab37bf0cc34..1aeefb17b9df 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -2590,20 +2590,6 @@ namespace ISD { cast(N)->getAddressingMode() == ISD::UNINDEXED; } - /// Return true if the node is a math/logic binary operator. - inline bool isBinaryOp(const SDNode *N) { - auto Op = N->getOpcode(); - return (Op == ISD::ADD || Op == ISD::SUB || Op == ISD::MUL || - Op == ISD::AND || Op == ISD::OR || Op == ISD::XOR || - Op == ISD::SHL || Op == ISD::SRL || Op == ISD::SRA || - Op == ISD::SDIV || Op == ISD::UDIV || Op == ISD::SREM || - Op == ISD::UREM || Op == ISD::FADD || Op == ISD::FSUB || - Op == ISD::FMUL || Op == ISD::FDIV || Op == ISD::FREM || - Op == ISD::FMINNUM || Op == ISD::FMAXNUM || - Op == ISD::FMINNUM_IEEE || Op == ISD::FMAXNUM_IEEE || - Op == ISD::FMAXIMUM || Op == ISD::FMINIMUM); - } - /// Attempt to match a unary predicate against a scalar/splat constant or /// every element of a constant BUILD_VECTOR. /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index 46a61f44a0e0..2694923af5ff 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -2172,6 +2172,39 @@ public: return false; } + /// Return true if the node is a math/logic binary operator. + virtual bool isBinOp(unsigned Opcode) const { + switch (Opcode) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::SHL: + case ISD::SRL: + case ISD::SRA: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case ISD::FMINNUM_IEEE: + case ISD::FMAXNUM_IEEE: + case ISD::FMAXIMUM: + case ISD::FMINIMUM: + return true; + default: + return false; + } + } + /// Returns true if the opcode is a commutative binary operation. virtual bool isCommutativeBinOp(unsigned Opcode) const { // FIXME: This should get its info from the td file. diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index fcbb9fcd990e..cb96e82baf90 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1961,7 +1961,7 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { } SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) { - assert(ISD::isBinaryOp(BO) && "Unexpected binary operator"); + assert(TLI.isBinOp(BO->getOpcode()) && "Unexpected binary operator"); // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. @@ -16169,14 +16169,14 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT, /// the math/logic after an extract element of a vector. static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG, bool LegalOperations) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Vec = ExtElt->getOperand(0); SDValue Index = ExtElt->getOperand(1); auto *IndexC = dyn_cast(Index); - if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse()) + if (!IndexC || !TLI.isBinOp(Vec->getOpcode()) || !Vec.hasOneUse()) return SDValue(); // Targets may want to avoid this to prevent an expensive register transfer. - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.shouldScalarizeBinop(Vec)) return SDValue(); @@ -17411,8 +17411,9 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = Extract->getOperand(0); - if (!ISD::isBinaryOp(BinOp.getNode())) + if (!TLI.isBinOp(BinOp.getOpcode())) return SDValue(); SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1); @@ -17424,7 +17425,6 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract, bool IsInsert1 = Bop1.getOpcode() == ISD::INSERT_SUBVECTOR && Bop1.getOperand(1).getValueType() == VT && Bop1.getOperand(2) == Index; - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); // TODO: We could handle the case where only 1 operand is being inserted by // creating an extract of the other operand, but that requires checking // number of uses and/or costs. @@ -17455,8 +17455,9 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { // We are looking for an optionally bitcasted wide vector binary operator // feeding an extract subvector. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0)); - if (!ISD::isBinaryOp(BinOp.getNode())) + if (!TLI.isBinOp(BinOp.getOpcode())) return SDValue(); // The binop must be a vector type, so we can extract some fraction of it. @@ -17486,7 +17487,6 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) { EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(), WideNumElts / NarrowingRatio); unsigned BOpcode = BinOp.getOpcode(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT)) return SDValue(); @@ -18269,7 +18269,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { int SplatIndex = SVN->getSplatIndex(); if (TLI.isExtractVecEltCheap(VT, SplatIndex) && - ISD::isBinaryOp(N0.getNode())) { + TLI.isBinOp(N0.getOpcode())) { // splat (vector_bo L, R), Index --> // splat (scalar_bo (extelt L, Index), (extelt R, Index)) SDValue L = N0.getOperand(0), R = N0.getOperand(1); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index fe982b1aa967..3f3305f3ab9e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1747,8 +1747,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op, static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG, const APInt &UndefOp0, const APInt &UndefOp1) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); EVT VT = BO.getValueType(); - assert(ISD::isBinaryOp(BO.getNode()) && VT.isVector() && "Vector binop only"); + assert(TLI.isBinOp(BO.getOpcode()) && VT.isVector() && "Vector binop only"); EVT EltVT = VT.getVectorElementType(); unsigned NumElts = VT.getVectorNumElements(); diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 3133ddd2a828..cb19bc2741f8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28333,6 +28333,23 @@ bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { return true; } +bool X86TargetLowering::isBinOp(unsigned Opcode) const { + switch (Opcode) { + case X86ISD::PMULUDQ: + case X86ISD::FMAX: + case X86ISD::FMIN: + case X86ISD::FMAXC: + case X86ISD::FMINC: + case X86ISD::FAND: + case X86ISD::FANDN: + case X86ISD::FOR: + case X86ISD::FXOR: + return true; + } + + return TargetLoweringBase::isBinOp(Opcode); +} + bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const { switch (Opcode) { // TODO: Add more X86ISD opcodes once we have test coverage. diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 9b2f059ae6bb..743980d209fb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -988,6 +988,9 @@ namespace llvm { bool isVectorShiftByScalarCheap(Type *Ty) const override; + /// Add x86-specific opcodes to the default list. + bool isBinOp(unsigned Opcode) const override; + /// Returns true if the opcode is a commutative binary operation. bool isCommutativeBinOp(unsigned Opcode) const override; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index 6e8059dfc28e..3fc44b82bc1f 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -103,9 +103,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -114,9 +114,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-LABEL: test_v8f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -155,9 +155,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -168,9 +168,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -218,7 +218,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -227,7 +227,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -251,7 +251,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -262,7 +262,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -292,7 +292,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -304,7 +304,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll index d3c1ca256e87..7582deda0978 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll @@ -103,9 +103,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -114,9 +114,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-LABEL: test_v8f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -155,9 +155,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -168,9 +168,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vmaxss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -218,7 +218,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -227,7 +227,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -251,7 +251,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -262,7 +262,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -292,7 +292,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vmaxpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -304,7 +304,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 8a544685b343..1789738cb8f8 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -103,9 +103,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -114,9 +114,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-LABEL: test_v8f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -155,9 +155,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -168,9 +168,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -218,7 +218,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -227,7 +227,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -251,7 +251,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -262,7 +262,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -292,7 +292,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -304,7 +304,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll index be193a96bfe4..66129ece4781 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll @@ -103,9 +103,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX-LABEL: test_v8f32: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -114,9 +114,9 @@ define float @test_v8f32(<8 x float> %a0) { ; AVX512-LABEL: test_v8f32: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -155,9 +155,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -168,9 +168,9 @@ define float @test_v16f32(<16 x float> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] -; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; AVX512-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -218,7 +218,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX-LABEL: test_v4f64: ; AVX: # %bb.0: ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -227,7 +227,7 @@ define double @test_v4f64(<4 x double> %a0) { ; AVX512-LABEL: test_v4f64: ; AVX512: # %bb.0: ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -251,7 +251,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX: # %bb.0: ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -262,7 +262,7 @@ define double @test_v8f64(<8 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper @@ -292,7 +292,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: vzeroupper @@ -304,7 +304,7 @@ define double @test_v16f64(<16 x double> %a0) { ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 -; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] ; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vzeroupper