diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 2232e419a619..f0fb03451b2a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -4380,46 +4380,57 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); + + // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which + // is "undef". We wanted 0, so CSEL it directly. + SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), + ISD::SETEQ, dl, DAG); + SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); + HiBitsForLo = + DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), + HiBitsForLo, CCVal, Cmp); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, DAG.getConstant(VTBits, dl, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), - ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); + SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue LoForNormalShift = + DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo); - SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); - SDValue Lo = - DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); + Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, + dl, DAG); + CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); + SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, + LoForNormalShift, CCVal, Cmp); // AArch64 shifts larger than the register width are wrapped rather than // clamped, so we can't just emit "hi >> x". - SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue TrueValHi = Opc == ISD::SRA - ? DAG.getNode(Opc, dl, VT, ShOpHi, - DAG.getConstant(VTBits - 1, dl, - MVT::i64)) - : DAG.getConstant(0, dl, VT); - SDValue Hi = - DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp); + SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue HiForBigShift = + Opc == ISD::SRA + ? DAG.getNode(Opc, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, dl, MVT::i64)) + : DAG.getConstant(0, dl, VT); + SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, + HiForNormalShift, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); } + /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two /// i64 values and take a 2 x i64 value to shift plus a shift amount. SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, - SelectionDAG &DAG) const { + SelectionDAG &DAG) const { assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); @@ -4427,31 +4438,41 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); - SDValue ARMcc; assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, DAG.getConstant(VTBits, dl, MVT::i64), ShAmt); - SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + + // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which + // is "undef". We wanted 0, so CSEL it directly. + SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64), + ISD::SETEQ, dl, DAG); + SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32); + LoBitsForHi = + DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64), + LoBitsForHi, CCVal, Cmp); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt, DAG.getConstant(VTBits, dl, MVT::i64)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue HiForNormalShift = + DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); - SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), - ISD::SETGE, dl, DAG); - SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); - SDValue Hi = - DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp); + Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE, + dl, DAG); + CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32); + SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift, + HiForNormalShift, CCVal, Cmp); // AArch64 shifts of larger than register sizes are wrapped rather than // clamped, so we can't just emit "lo << a" if a is too big. - SDValue TrueValLo = DAG.getConstant(0, dl, VT); - SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Lo = - DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp); + SDValue LoForBigShift = DAG.getConstant(0, dl, VT); + SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift, + LoForNormalShift, CCVal, Cmp); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); diff --git a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll index d5baf16bdd5c..ad89d3ff711b 100644 --- a/llvm/test/CodeGen/AArch64/arm64-long-shift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-long-shift.ll @@ -2,18 +2,20 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: shl: -; CHECK: lsl [[XREG_0:x[0-9]+]], x1, x2 -; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 -; CHECK-NEXT: lsr [[XREG_3:x[0-9]+]], x0, [[XREG_2]] -; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]] -; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64 -; CHECK-NEXT: lsl [[XREG_5:x[0-9]+]], x0, [[XREG_4]] -; CHECK-NEXT: cmp [[XREG_4]], #0 -; CHECK-NEXT: csel x1, [[XREG_5]], [[XREG_6]], ge -; CHECK-NEXT: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2 -; CHECK-NEXT: csel x0, xzr, [[SMALLSHIFT_LO]], ge -; CHECK-NEXT: ret +; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 +; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]] +; CHECK: cmp x2, #0 +; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq +; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2 +; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]] +; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 +; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]] +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge +; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2 +; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge +; CHECK: ret %shl = shl i128 %r, %s ret i128 %shl @@ -21,19 +23,21 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone { define i128 @ashr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: ashr: -; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2 -; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 -; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]] -; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]] -; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64 -; CHECK-NEXT: asr [[XREG_6:x[0-9]+]], x1, [[XREG_5]] -; CHECK-NEXT: cmp [[XREG_5]], #0 -; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge -; CHECK-NEXT: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 -; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63 -; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge -; CHECK-NEXT: ret +; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 +; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] +; CHECK: cmp x2, #0 +; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq +; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 +; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] +; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 +; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge +; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 +; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63 +; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge +; CHECK: ret %shr = ashr i128 %r, %s ret i128 %shr @@ -41,18 +45,20 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone { define i128 @lshr(i128 %r, i128 %s) nounwind readnone { ; CHECK-LABEL: lshr: -; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2 -; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40 -; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2 -; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]] -; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]] -; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64 -; CHECK-NEXT: lsr [[XREG_6:x[0-9]+]], x1, [[XREG_5]] -; CHECK-NEXT: cmp [[XREG_5]], #0 -; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge -; CHECK-NEXT: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 -; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge -; CHECK-NEXT: ret +; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40 +; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2 +; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]] +; CHECK: cmp x2, #0 +; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq +; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2 +; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]] +; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64 +; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]] +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge +; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2 +; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge +; CHECK: ret %shr = lshr i128 %r, %s ret i128 %shr