From 54d446f70e8ad00a37cf37cce1691b36c23d40bc Mon Sep 17 00:00:00 2001 From: Zi Xuan Wu Date: Wed, 31 Jul 2019 07:03:42 +0000 Subject: [PATCH] revert r367382 because buildbot failure llvm-svn: 367388 --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 71 -------- llvm/lib/Target/PowerPC/PPCISelLowering.h | 12 -- llvm/lib/Target/PowerPC/PPCInstrVSX.td | 34 +--- .../CodeGen/PowerPC/build-vector-tests.ll | 62 ++++--- .../PowerPC/load-shuffle-and-shuffle-store.ll | 153 ++++++++++++++---- llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll | 6 +- 6 files changed, 169 insertions(+), 169 deletions(-) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e1f3ec214449..b1ab405e54f6 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1118,8 +1118,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTargetDAGCombine(ISD::ANY_EXTEND); setTargetDAGCombine(ISD::TRUNCATE); - setTargetDAGCombine(ISD::VECTOR_SHUFFLE); - if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); @@ -1354,8 +1352,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::SExtVElems: return "PPCISD::SExtVElems"; case PPCISD::LXVD2X: return "PPCISD::LXVD2X"; case PPCISD::STXVD2X: return "PPCISD::STXVD2X"; - case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE"; - case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE"; case PPCISD::ST_VSR_SCAL_INT: return "PPCISD::ST_VSR_SCAL_INT"; case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; @@ -13117,60 +13113,6 @@ SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N, return Val; } -SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN, - LSBaseSDNode *LSBase, - DAGCombinerInfo &DCI) const { - assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) && - "Not a reverse memop pattern!"); - - auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool { - auto Mask = SVN->getMask(); - int i = 0; - auto I = Mask.rbegin(); - auto E = Mask.rend(); - - for (; I != E; ++I) { - if (*I != i) - return false; - i++; - } - return true; - }; - - SelectionDAG &DAG = DCI.DAG; - EVT VT = SVN->getValueType(0); - - if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX()) - return SDValue(); - - // Before P9, we don't have vector load/store instrs in big-endian - // element order for v8i16 or v16i8 - if (!Subtarget.hasP9Vector() && (VT == MVT::v8i16 || VT == MVT::v16i8)) - return SDValue(); - - if(!IsElementReverse(SVN)) - return SDValue(); - - if (LSBase->getOpcode() == ISD::LOAD) { - SDLoc dl(SVN); - SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()}; - return DAG.getMemIntrinsicNode( - PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps, - LSBase->getMemoryVT(), LSBase->getMemOperand()); - } - - if (LSBase->getOpcode() == ISD::STORE) { - SDLoc dl(LSBase); - SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0), - LSBase->getBasePtr()}; - return DAG.getMemIntrinsicNode( - PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps, - LSBase->getMemoryVT(), LSBase->getMemOperand()); - } - - llvm_unreachable("Expected a load or store node here"); -} - SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -13217,12 +13159,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return combineFPToIntToFP(N, DCI); - case ISD::VECTOR_SHUFFLE: - if (ISD::isNormalLoad(N->getOperand(0).getNode())) { - LSBaseSDNode* LSBase = cast(N->getOperand(0)); - return combineVReverseMemOP(cast(N), LSBase, DCI); - } - break; case ISD::STORE: { EVT Op1VT = N->getOperand(1).getValueType(); @@ -13234,13 +13170,6 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, return Val; } - if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) { - ShuffleVectorSDNode *SVN = cast(N->getOperand(1)); - SDValue Val= combineVReverseMemOP(SVN, cast(N), DCI); - if (Val) - return Val; - } - // Turn STORE (BSWAP) -> sthbrx/stwbrx. if (cast(N)->isUnindexed() && Opcode == ISD::BSWAP && N->getOperand(1).getNode()->hasOneUse() && diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h index 499f8a25b609..ff9423aadee1 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -456,11 +456,6 @@ namespace llvm { /// an xxswapd. LXVD2X, - /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian. - /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on - /// the vector type to load vector in big-endian element order. - LOAD_VEC_BE, - /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a /// v2f32 value into the lower half of a VSR register. LD_VSX_LH, @@ -470,11 +465,6 @@ namespace llvm { /// an xxswapd. STXVD2X, - /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian. - /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on - /// the vector type to store vector in big-endian element order. - STORE_VEC_BE, - /// Store scalar integers from VSR. ST_VSR_SCAL_INT, @@ -1177,8 +1167,6 @@ namespace llvm { SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineABS(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineVSelect(SDNode *N, DAGCombinerInfo &DCI) const; - SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase, - DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td index b31cdee388df..b28e18d44bcb 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -78,21 +78,12 @@ def SDTVecConv : SDTypeProfile<1, 2, [ def SDTVabsd : SDTypeProfile<1, 3, [ SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i32> ]>; -def SDT_PPCld_vec_be : SDTypeProfile<1, 1, [ - SDTCisVec<0>, SDTCisPtrTy<1> -]>; -def SDT_PPCst_vec_be : SDTypeProfile<0, 2, [ - SDTCisVec<0>, SDTCisPtrTy<1> -]>; + def PPClxvd2x : SDNode<"PPCISD::LXVD2X", SDT_PPClxvd2x, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCstxvd2x : SDNode<"PPCISD::STXVD2X", SDT_PPCstxvd2x, [SDNPHasChain, SDNPMayStore]>; -def PPCld_vec_be : SDNode<"PPCISD::LOAD_VEC_BE", SDT_PPCld_vec_be, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def PPCst_vec_be : SDNode<"PPCISD::STORE_VEC_BE", SDT_PPCst_vec_be, - [SDNPHasChain, SDNPMayStore]>; def PPCxxswapd : SDNode<"PPCISD::XXSWAPD", SDT_PPCxxswapd, [SDNPHasChain]>; def PPCmfvsr : SDNode<"PPCISD::MFVSR", SDTUnaryOp, []>; def PPCmtvsra : SDNode<"PPCISD::MTVSRA", SDTUnaryOp, []>; @@ -1097,19 +1088,6 @@ let Predicates = [HasVSX, HasOnlySwappingMemOps] in { (STXVD2X $rS, xoaddr:$dst)>; def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; } - -// Load vector big endian order -let Predicates = [IsLittleEndian, HasVSX] in { - def : Pat<(v2f64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(v4f32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v4f32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; - def : Pat<(v2i64 (PPCld_vec_be xoaddr:$src)), (LXVD2X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>; - def : Pat<(v4i32 (PPCld_vec_be xoaddr:$src)), (LXVW4X xoaddr:$src)>; - def : Pat<(PPCst_vec_be v4i32:$rS, xoaddr:$dst), (STXVW4X $rS, xoaddr:$dst)>; -} - let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in { def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>; @@ -3046,16 +3024,6 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in { (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 4))>; def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)), (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 0))>; - - def : Pat<(v8i16 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVH8X xoaddr:$src), VRRC)>; - def : Pat<(PPCst_vec_be v8i16:$rS, xoaddr:$dst), - (STXVH8X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; - - def : Pat<(v16i8 (PPCld_vec_be xoaddr:$src)), - (COPY_TO_REGCLASS (LXVB16X xoaddr:$src), VRRC)>; - def : Pat<(PPCst_vec_be v16i8:$rS, xoaddr:$dst), - (STXVB16X (COPY_TO_REGCLASS $rS, VSRC), xoaddr:$dst)>; } // IsLittleEndian, HasP9Vector let Predicates = [IsBigEndian, HasP9Vector] in { diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll index a67fdf3e8f64..adbd7622a80c 100644 --- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll +++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll @@ -986,7 +986,11 @@ define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) { ; ; P9LE-LABEL: fromDiffMemConsDi: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lxvw4x v2, 0, r3 +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: addis r3, r2, .LCPI8_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI8_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemConsDi: @@ -1000,7 +1004,12 @@ define <4 x i32> @fromDiffMemConsDi(i32* nocapture readonly %arr) { ; ; P8LE-LABEL: fromDiffMemConsDi: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lxvw4x v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI8_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI8_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: vperm v2, v3, v3, v2 ; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 @@ -2561,7 +2570,11 @@ define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) { ; ; P9LE-LABEL: fromDiffMemConsDui: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lxvw4x v2, 0, r3 +; P9LE-NEXT: lxv v2, 0(r3) +; P9LE-NEXT: addis r3, r2, .LCPI41_0@toc@ha +; P9LE-NEXT: addi r3, r3, .LCPI41_0@toc@l +; P9LE-NEXT: lxvx v3, 0, r3 +; P9LE-NEXT: vperm v2, v2, v2, v3 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemConsDui: @@ -2575,7 +2588,12 @@ define <4 x i32> @fromDiffMemConsDui(i32* nocapture readonly %arr) { ; ; P8LE-LABEL: fromDiffMemConsDui: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lxvw4x v2, 0, r3 +; P8LE-NEXT: lxvd2x vs0, 0, r3 +; P8LE-NEXT: addis r4, r2, .LCPI41_0@toc@ha +; P8LE-NEXT: addi r3, r4, .LCPI41_0@toc@l +; P8LE-NEXT: lvx v2, 0, r3 +; P8LE-NEXT: xxswapd v3, vs0 +; P8LE-NEXT: vperm v2, v3, v3, v2 ; P8LE-NEXT: blr entry: %arrayidx = getelementptr inbounds i32, i32* %arr, i64 3 @@ -4137,8 +4155,8 @@ define <2 x i64> @fromDiffMemConsDll(i64* nocapture readonly %arr) { ; ; P9LE-LABEL: fromDiffMemConsDll: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: addi r3, r3, 16 -; P9LE-NEXT: lxvd2x v2, 0, r3 +; P9LE-NEXT: lxv v2, 16(r3) +; P9LE-NEXT: xxswapd v2, v2 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemConsDll: @@ -4217,8 +4235,9 @@ define <2 x i64> @fromDiffMemVarDll(i64* nocapture readonly %arr, i32 signext %e ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r4, 3 ; P9LE-NEXT: add r3, r3, r4 -; P9LE-NEXT: addi r3, r3, -8 -; P9LE-NEXT: lxvd2x v2, 0, r3 +; P9LE-NEXT: li r4, -8 +; P9LE-NEXT: lxvx v2, r3, r4 +; P9LE-NEXT: xxswapd v2, v2 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemVarDll: @@ -4929,8 +4948,8 @@ define <2 x i64> @fromDiffMemConsDConvdtoll(double* nocapture readonly %ptr) { ; ; P9LE-LABEL: fromDiffMemConsDConvdtoll: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: addi r3, r3, 16 -; P9LE-NEXT: lxvd2x vs0, 0, r3 +; P9LE-NEXT: lxv vs0, 16(r3) +; P9LE-NEXT: xxswapd vs0, vs0 ; P9LE-NEXT: xvcvdpsxds v2, vs0 ; P9LE-NEXT: blr ; @@ -5021,8 +5040,9 @@ define <2 x i64> @fromDiffMemVarDConvdtoll(double* nocapture readonly %arr, i32 ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r4, 3 ; P9LE-NEXT: add r3, r3, r4 -; P9LE-NEXT: addi r3, r3, -8 -; P9LE-NEXT: lxvd2x vs0, 0, r3 +; P9LE-NEXT: li r4, -8 +; P9LE-NEXT: lxvx vs0, r3, r4 +; P9LE-NEXT: xxswapd vs0, vs0 ; P9LE-NEXT: xvcvdpsxds v2, vs0 ; P9LE-NEXT: blr ; @@ -5382,8 +5402,8 @@ define <2 x i64> @fromDiffMemConsDull(i64* nocapture readonly %arr) { ; ; P9LE-LABEL: fromDiffMemConsDull: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: addi r3, r3, 16 -; P9LE-NEXT: lxvd2x v2, 0, r3 +; P9LE-NEXT: lxv v2, 16(r3) +; P9LE-NEXT: xxswapd v2, v2 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemConsDull: @@ -5462,8 +5482,9 @@ define <2 x i64> @fromDiffMemVarDull(i64* nocapture readonly %arr, i32 signext % ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r4, 3 ; P9LE-NEXT: add r3, r3, r4 -; P9LE-NEXT: addi r3, r3, -8 -; P9LE-NEXT: lxvd2x v2, 0, r3 +; P9LE-NEXT: li r4, -8 +; P9LE-NEXT: lxvx v2, r3, r4 +; P9LE-NEXT: xxswapd v2, v2 ; P9LE-NEXT: blr ; ; P8BE-LABEL: fromDiffMemVarDull: @@ -6174,8 +6195,8 @@ define <2 x i64> @fromDiffMemConsDConvdtoull(double* nocapture readonly %ptr) { ; ; P9LE-LABEL: fromDiffMemConsDConvdtoull: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: addi r3, r3, 16 -; P9LE-NEXT: lxvd2x vs0, 0, r3 +; P9LE-NEXT: lxv vs0, 16(r3) +; P9LE-NEXT: xxswapd vs0, vs0 ; P9LE-NEXT: xvcvdpuxds v2, vs0 ; P9LE-NEXT: blr ; @@ -6266,8 +6287,9 @@ define <2 x i64> @fromDiffMemVarDConvdtoull(double* nocapture readonly %arr, i32 ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r4, 3 ; P9LE-NEXT: add r3, r3, r4 -; P9LE-NEXT: addi r3, r3, -8 -; P9LE-NEXT: lxvd2x vs0, 0, r3 +; P9LE-NEXT: li r4, -8 +; P9LE-NEXT: lxvx vs0, r3, r4 +; P9LE-NEXT: xxswapd vs0, vs0 ; P9LE-NEXT: xvcvdpuxds v2, vs0 ; P9LE-NEXT: blr ; diff --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll index dc2ead88f128..ebbce70409c5 100644 --- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll +++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll @@ -19,7 +19,8 @@ define <2 x i64> @load_swap00(<2 x i64>* %vp1, <2 x i64>* %vp2) { ; ; CHECK-P9-LABEL: load_swap00: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvd2x v2, 0, r3 +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: xxswapd v2, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap00: @@ -47,7 +48,8 @@ define <2 x i64> @load_swap01(<2 x i64>* %vp1, <2 x i64>* %vp2) { ; ; CHECK-P9-LABEL: load_swap01: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvd2x v2, 0, r4 +; CHECK-P9-NEXT: lxv v2, 0(r4) +; CHECK-P9-NEXT: xxswapd v2, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap01: @@ -70,12 +72,20 @@ define <2 x i64> @load_swap01(<2 x i64>* %vp1, <2 x i64>* %vp2) { define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) { ; CHECK-P8-LABEL: load_swap10: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: lxvw4x v2, 0, r3 +; CHECK-P8-NEXT: addis r4, r2, .LCPI2_0@toc@ha +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: addi r4, r4, .LCPI2_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap10: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvw4x v2, 0, r3 +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: addis r3, r2, .LCPI2_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI2_0@toc@l +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap10: @@ -104,12 +114,20 @@ define <4 x i32> @load_swap10(<4 x i32>* %vp1, <4 x i32>* %vp2) { define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) { ; CHECK-P8-LABEL: load_swap11: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-P8-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap11: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvw4x v2, 0, r4 +; CHECK-P9-NEXT: addis r3, r2, .LCPI3_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI3_0@toc@l +; CHECK-P9-NEXT: lxv v2, 0(r4) +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap11: @@ -147,7 +165,11 @@ define <8 x i16> @load_swap20(<8 x i16>* %vp1, <8 x i16>* %vp2){ ; ; CHECK-P9-LABEL: load_swap20: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvh8x v2, 0, r3 +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: addis r3, r2, .LCPI4_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI4_0@toc@l +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap20: @@ -185,7 +207,11 @@ define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){ ; ; CHECK-P9-LABEL: load_swap21: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvh8x v2, 0, r4 +; CHECK-P9-NEXT: addis r3, r2, .LCPI5_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI5_0@toc@l +; CHECK-P9-NEXT: lxv v2, 0(r4) +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap21: @@ -223,7 +249,8 @@ define <16 x i8> @load_swap30(<16 x i8>* %vp1, <16 x i8>* %vp2){ ; ; CHECK-P9-LABEL: load_swap30: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvb16x v2, 0, r3 +; CHECK-P9-NEXT: lxv vs0, 0(r3) +; CHECK-P9-NEXT: xxbrq v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap30: @@ -258,7 +285,8 @@ define <16 x i8> @load_swap31(<16 x i8>* %vp1, <16 x i8>* %vp2){ ; ; CHECK-P9-LABEL: load_swap31: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvb16x v2, 0, r4 +; CHECK-P9-NEXT: lxv vs0, 0(r4) +; CHECK-P9-NEXT: xxbrq v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap31: @@ -289,7 +317,8 @@ define <2 x double> @load_swap40(<2 x double>* %vp1, <2 x double>* %vp2) { ; ; CHECK-P9-LABEL: load_swap40: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvd2x v2, 0, r4 +; CHECK-P9-NEXT: lxv vs0, 0(r4) +; CHECK-P9-NEXT: xxswapd v2, vs0 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap40: @@ -312,12 +341,20 @@ define <2 x double> @load_swap40(<2 x double>* %vp1, <2 x double>* %vp2) { define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) { ; CHECK-P8-LABEL: load_swap50: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: lxvw4x v2, 0, r3 +; CHECK-P8-NEXT: addis r4, r2, .LCPI9_0@toc@ha +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: addi r4, r4, .LCPI9_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r4 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap50: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvw4x v2, 0, r3 +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: addis r3, r2, .LCPI9_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI9_0@toc@l +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap50: @@ -346,12 +383,20 @@ define <4 x float> @load_swap50(<4 x float>* %vp1, <4 x float>* %vp2) { define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) { ; CHECK-P8-LABEL: load_swap51: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: lxvw4x v2, 0, r4 +; CHECK-P8-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-P8-NEXT: lvx v3, 0, r4 +; CHECK-P8-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: load_swap51: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: lxvw4x v2, 0, r4 +; CHECK-P9-NEXT: addis r3, r2, .LCPI10_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI10_0@toc@l +; CHECK-P9-NEXT: lxv v2, 0(r4) +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: load_swap51: @@ -385,7 +430,8 @@ define void @swap_store00(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) { ; ; CHECK-P9-LABEL: swap_store00: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvd2x v2, 0, r7 +; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: stxv vs0, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store00: @@ -412,7 +458,8 @@ define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) { ; ; CHECK-P9-LABEL: swap_store01: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvd2x v3, 0, r7 +; CHECK-P9-NEXT: xxswapd vs0, v3 +; CHECK-P9-NEXT: stxv vs0, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store01: @@ -434,12 +481,20 @@ define void @swap_store01(<2 x i64> %v1, <2 x i64> %v2, <2 x i64>* %vp) { define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) { ; CHECK-P8-LABEL: swap_store10: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvw4x v2, 0, r7 +; CHECK-P8-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI13_0@toc@l +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 +; CHECK-P8-NEXT: stvx v2, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store10: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvw4x v2, 0, r7 +; CHECK-P9-NEXT: addis r3, r2, .LCPI13_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI13_0@toc@l +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-NEXT: stxv v2, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store10: @@ -467,12 +522,20 @@ define void @swap_store10(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) { define void @swap_store11(<4 x i32> %v1, <4 x i32> %v2, <4 x i32>* %vp) { ; CHECK-P8-LABEL: swap_store11: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvw4x v3, 0, r7 +; CHECK-P8-NEXT: addis r3, r2, .LCPI14_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: stvx v2, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store11: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvw4x v3, 0, r7 +; CHECK-P9-NEXT: addis r3, r2, .LCPI14_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI14_0@toc@l +; CHECK-P9-NEXT: lxvx v2, 0, r3 +; CHECK-P9-NEXT: vperm v2, v3, v3, v2 +; CHECK-P9-NEXT: stxv v2, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store11: @@ -509,7 +572,11 @@ define void @swap_store20(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) { ; ; CHECK-P9-LABEL: swap_store20: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvh8x v2, 0, r7 +; CHECK-P9-NEXT: addis r3, r2, .LCPI15_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI15_0@toc@l +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-NEXT: stxv v2, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store20: @@ -546,7 +613,11 @@ define void @swap_store21(<8 x i16> %v1, <8 x i16> %v2, <8 x i16>* %vp) { ; ; CHECK-P9-LABEL: swap_store21: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvh8x v3, 0, r7 +; CHECK-P9-NEXT: addis r3, r2, .LCPI16_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI16_0@toc@l +; CHECK-P9-NEXT: lxvx v2, 0, r3 +; CHECK-P9-NEXT: vperm v2, v3, v3, v2 +; CHECK-P9-NEXT: stxv v2, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store21: @@ -583,7 +654,8 @@ define void @swap_store30(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) { ; ; CHECK-P9-LABEL: swap_store30: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvb16x v2, 0, r7 +; CHECK-P9-NEXT: xxbrq vs0, v2 +; CHECK-P9-NEXT: stxv vs0, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store30: @@ -617,7 +689,8 @@ define void @swap_store31(<16 x i8> %v1, <16 x i8> %v2, <16 x i8>* %vp) { ; ; CHECK-P9-LABEL: swap_store31: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvb16x v3, 0, r7 +; CHECK-P9-NEXT: xxbrq vs0, v3 +; CHECK-P9-NEXT: stxv vs0, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store31: @@ -647,7 +720,8 @@ define void @swap_store40(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) ; ; CHECK-P9-LABEL: swap_store40: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvd2x v2, 0, r7 +; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: stxv vs0, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store40: @@ -674,7 +748,8 @@ define void @swap_store41(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) ; ; CHECK-P9-LABEL: swap_store41: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvd2x v3, 0, r7 +; CHECK-P9-NEXT: xxswapd vs0, v3 +; CHECK-P9-NEXT: stxv vs0, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store41: @@ -696,12 +771,20 @@ define void @swap_store41(<2 x double> %v1, <2 x double> %v2, <2 x double>* %vp) define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) { ; CHECK-P8-LABEL: swap_store50: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvw4x v2, 0, r7 +; CHECK-P8-NEXT: addis r3, r2, .LCPI21_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI21_0@toc@l +; CHECK-P8-NEXT: lvx v3, 0, r3 +; CHECK-P8-NEXT: vperm v2, v2, v2, v3 +; CHECK-P8-NEXT: stvx v2, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store50: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvw4x v2, 0, r7 +; CHECK-P9-NEXT: addis r3, r2, .LCPI21_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI21_0@toc@l +; CHECK-P9-NEXT: lxvx v3, 0, r3 +; CHECK-P9-NEXT: vperm v2, v2, v2, v3 +; CHECK-P9-NEXT: stxv v2, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store50: @@ -729,12 +812,20 @@ define void @swap_store50(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) { define void @swap_store51(<4 x float> %v1, <4 x float> %v2, <4 x float>* %vp) { ; CHECK-P8-LABEL: swap_store51: ; CHECK-P8: # %bb.0: -; CHECK-P8-NEXT: stxvw4x v3, 0, r7 +; CHECK-P8-NEXT: addis r3, r2, .LCPI22_0@toc@ha +; CHECK-P8-NEXT: addi r3, r3, .LCPI22_0@toc@l +; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: vperm v2, v3, v3, v2 +; CHECK-P8-NEXT: stvx v2, 0, r7 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: swap_store51: ; CHECK-P9: # %bb.0: -; CHECK-P9-NEXT: stxvw4x v3, 0, r7 +; CHECK-P9-NEXT: addis r3, r2, .LCPI22_0@toc@ha +; CHECK-P9-NEXT: addi r3, r3, .LCPI22_0@toc@l +; CHECK-P9-NEXT: lxvx v2, 0, r3 +; CHECK-P9-NEXT: vperm v2, v3, v3, v2 +; CHECK-P9-NEXT: stxv v2, 0(r7) ; CHECK-P9-NEXT: blr ; ; CHECK-P8-BE-LABEL: swap_store51: diff --git a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll index c2b886d6055a..cfe201999282 100644 --- a/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll +++ b/llvm/test/CodeGen/PowerPC/vsx_shuffle_le.ll @@ -85,7 +85,8 @@ define <2 x double> @test10(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: lxvd2x 34, 0, 3 ; CHECK-P9-LABEL: @test10 -; CHECK-P9: lxvd2x 34, 0, 3 +; CHECK-P9: lxv 0, 0(3) +; CHECK-P9: xxswapd 34, 0 } define <2 x double> @test11(<2 x double>* %p1, <2 x double>* %p2) { @@ -256,7 +257,8 @@ define <2 x double> @test32(<2 x double>* %p1, <2 x double>* %p2) { ; CHECK: lxvd2x 34, 0, 4 ; CHECK-P9-LABEL: @test32 -; CHECK-P9: lxvd2x 34, 0, 4 +; CHECK-P9: lxv 0, 0(4) +; CHECK-P9: xxswapd 34, 0 } define <2 x double> @test33(<2 x double>* %p1, <2 x double>* %p2) {