From 39522b1e10428e4fa79a9d2dda20cbea7a1168e0 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Tue, 25 Aug 2020 14:41:53 +0100 Subject: [PATCH] [SelectionDAG] Legalize intrinsic get.active.lane.mask This adapts legalization of intrinsic get.active.lane.mask to the new semantics as described in D86147. Because the second argument is now the loop tripcount, we legalize this intrinsic to an 'icmp ULT' instead of an ULE when it was the backedge-taken count. Differential Revision: https://reviews.llvm.org/D86302 --- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 10 +++++----- .../LowOverheadLoops/tail-pred-intrinsic-round.ll | 2 +- llvm/test/CodeGen/Thumb2/active_lane_mask.ll | 14 +++++++------- 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 9e57fa084ad8..38d11e4cd059 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6890,16 +6890,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, case Intrinsic::get_active_lane_mask: { auto DL = getCurSDLoc(); SDValue Index = getValue(I.getOperand(0)); - SDValue BTC = getValue(I.getOperand(1)); + SDValue TripCount = getValue(I.getOperand(1)); Type *ElementTy = I.getOperand(0)->getType(); EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); unsigned VecWidth = VT.getVectorNumElements(); - SmallVector OpsBTC; + SmallVector OpsTripCount; SmallVector OpsIndex; SmallVector OpsStepConstants; for (unsigned i = 0; i < VecWidth; i++) { - OpsBTC.push_back(BTC); + OpsTripCount.push_back(TripCount); OpsIndex.push_back(Index); OpsStepConstants.push_back(DAG.getConstant(i, DL, MVT::getVT(ElementTy))); } @@ -6912,9 +6912,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, SDValue VectorStep = DAG.getBuildVector(VecTy, DL, OpsStepConstants); SDValue VectorInduction = DAG.getNode( ISD::UADDO, DL, DAG.getVTList(VecTy, CCVT), VectorIndex, VectorStep); - SDValue VectorBTC = DAG.getBuildVector(VecTy, DL, OpsBTC); + SDValue VectorTripCount = DAG.getBuildVector(VecTy, DL, OpsTripCount); SDValue SetCC = DAG.getSetCC(DL, CCVT, VectorInduction.getValue(0), - VectorBTC, ISD::CondCode::SETULE); + VectorTripCount, ISD::CondCode::SETULT); setValue(&I, DAG.getNode(ISD::AND, DL, CCVT, DAG.getNOT(DL, VectorInduction.getValue(1), CCVT), SetCC)); diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll index d29c39a82124..b6076eb28ff4 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll @@ -253,7 +253,7 @@ define arm_aapcs_vfpcc void @nearbyint(float* noalias nocapture readonly %pSrcA, ; CHECK-NEXT: add.w r12, r12, #4 ; CHECK-NEXT: vpnot ; CHECK-NEXT: vpstt -; CHECK-NEXT: vcmpt.u32 cs, q1, q2 +; CHECK-NEXT: vcmpt.u32 hi, q1, q2 ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 ; CHECK-NEXT: vrintr.f32 s15, s11 ; CHECK-NEXT: vrintr.f32 s14, s10 diff --git a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll index e9dfccd320da..7696e6645195 100644 --- a/llvm/test/CodeGen/Thumb2/active_lane_mask.ll +++ b/llvm/test/CodeGen/Thumb2/active_lane_mask.ll @@ -13,7 +13,7 @@ define <4 x i32> @v4i32(i32 %index, i32 %BTC, <4 x i32> %V1, <4 x i32> %V2) { ; CHECK-NEXT: vdup.32 q1, r1 ; CHECK-NEXT: vpnot ; CHECK-NEXT: vpst -; CHECK-NEXT: vcmpt.u32 cs, q1, q0 +; CHECK-NEXT: vcmpt.u32 hi, q1, q0 ; CHECK-NEXT: vmov d0, r2, r3 ; CHECK-NEXT: vldr d1, [sp] ; CHECK-NEXT: vldrw.u32 q1, [r0] @@ -43,7 +43,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) { ; CHECK-NEXT: vmov.i8 q1, #0x0 ; CHECK-NEXT: vmov.i8 q2, #0xff ; CHECK-NEXT: vadd.i32 q3, q0, r0 -; CHECK-NEXT: vcmp.u32 cs, q5, q3 +; CHECK-NEXT: vcmp.u32 hi, q5, q3 ; CHECK-NEXT: vpsel q4, q2, q1 ; CHECK-NEXT: vmov r1, s16 ; CHECK-NEXT: vmov.16 q0[0], r1 @@ -56,7 +56,7 @@ define <8 x i16> @v8i16(i32 %index, i32 %BTC, <8 x i16> %V1, <8 x i16> %V2) { ; CHECK-NEXT: adr r1, .LCPI1_1 ; CHECK-NEXT: vldrw.u32 q4, [r1] ; CHECK-NEXT: vadd.i32 q4, q4, r0 -; CHECK-NEXT: vcmp.u32 cs, q5, q4 +; CHECK-NEXT: vcmp.u32 hi, q5, q4 ; CHECK-NEXT: vpsel q5, q2, q1 ; CHECK-NEXT: vmov r1, s20 ; CHECK-NEXT: vmov.16 q0[4], r1 @@ -128,7 +128,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: vmov.i8 q5, #0x0 ; CHECK-NEXT: vmov.i8 q4, #0xff ; CHECK-NEXT: vadd.i32 q1, q0, r0 -; CHECK-NEXT: vcmp.u32 cs, q7, q1 +; CHECK-NEXT: vcmp.u32 hi, q7, q1 ; CHECK-NEXT: vpsel q0, q4, q5 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov.16 q2[0], r1 @@ -141,7 +141,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: adr r1, .LCPI2_1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vadd.i32 q3, q0, r0 -; CHECK-NEXT: vcmp.u32 cs, q7, q3 +; CHECK-NEXT: vcmp.u32 hi, q7, q3 ; CHECK-NEXT: vpsel q0, q4, q5 ; CHECK-NEXT: vmov r1, s0 ; CHECK-NEXT: vmov.16 q2[4], r1 @@ -172,7 +172,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: adr r1, .LCPI2_2 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vadd.i32 q0, q0, r0 -; CHECK-NEXT: vcmp.u32 cs, q7, q0 +; CHECK-NEXT: vcmp.u32 hi, q7, q0 ; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill ; CHECK-NEXT: vpsel q6, q4, q5 ; CHECK-NEXT: vmov r1, s24 @@ -186,7 +186,7 @@ define <16 x i8> @v16i8(i32 %index, i32 %BTC, <16 x i8> %V1, <16 x i8> %V2) { ; CHECK-NEXT: adr r1, .LCPI2_3 ; CHECK-NEXT: vldrw.u32 q6, [r1] ; CHECK-NEXT: vadd.i32 q6, q6, r0 -; CHECK-NEXT: vcmp.u32 cs, q7, q6 +; CHECK-NEXT: vcmp.u32 hi, q7, q6 ; CHECK-NEXT: vpsel q7, q4, q5 ; CHECK-NEXT: vmov r1, s28 ; CHECK-NEXT: vmov.16 q0[4], r1