forked from OSchip/llvm-project
AArch64: fix 128-bit shifts
We mustn't introduce a shift of exactly 64-bits for any inputs, since that's an UNDEF value (and worse, it's not what you want with the natural Arch64 implementation). The generated code is pretty horrific, but I couldn't come up with an obviously better alternative (if the amount is constant EXTR could help). Turns out 128-bit shifts are just nasty. rdar://22491037 llvm-svn: 254475
This commit is contained in:
parent
af714765e6
commit
f3be9d5c0b
|
@ -4380,42 +4380,53 @@ SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
|
|||
SDValue ShOpLo = Op.getOperand(0);
|
||||
SDValue ShOpHi = Op.getOperand(1);
|
||||
SDValue ShAmt = Op.getOperand(2);
|
||||
SDValue ARMcc;
|
||||
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
|
||||
|
||||
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
|
||||
|
||||
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
|
||||
DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
|
||||
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
|
||||
SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
|
||||
|
||||
// Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
|
||||
// is "undef". We wanted 0, so CSEL it directly.
|
||||
SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
|
||||
ISD::SETEQ, dl, DAG);
|
||||
SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
|
||||
HiBitsForLo =
|
||||
DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
|
||||
HiBitsForLo, CCVal, Cmp);
|
||||
|
||||
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
|
||||
DAG.getConstant(VTBits, dl, MVT::i64));
|
||||
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
|
||||
|
||||
SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
|
||||
ISD::SETGE, dl, DAG);
|
||||
SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
|
||||
SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
|
||||
SDValue LoForNormalShift =
|
||||
DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
|
||||
|
||||
SDValue FalseValLo = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
|
||||
SDValue TrueValLo = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
|
||||
SDValue Lo =
|
||||
DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
|
||||
Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
|
||||
dl, DAG);
|
||||
CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
|
||||
SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
|
||||
SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
|
||||
LoForNormalShift, CCVal, Cmp);
|
||||
|
||||
// AArch64 shifts larger than the register width are wrapped rather than
|
||||
// clamped, so we can't just emit "hi >> x".
|
||||
SDValue FalseValHi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
|
||||
SDValue TrueValHi = Opc == ISD::SRA
|
||||
SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
|
||||
SDValue HiForBigShift =
|
||||
Opc == ISD::SRA
|
||||
? DAG.getNode(Opc, dl, VT, ShOpHi,
|
||||
DAG.getConstant(VTBits - 1, dl,
|
||||
MVT::i64))
|
||||
DAG.getConstant(VTBits - 1, dl, MVT::i64))
|
||||
: DAG.getConstant(0, dl, VT);
|
||||
SDValue Hi =
|
||||
DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValHi, FalseValHi, CCVal, Cmp);
|
||||
SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
|
||||
HiForNormalShift, CCVal, Cmp);
|
||||
|
||||
SDValue Ops[2] = { Lo, Hi };
|
||||
return DAG.getMergeValues(Ops, dl);
|
||||
}
|
||||
|
||||
|
||||
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
|
||||
/// i64 values and take a 2 x i64 value to shift plus a shift amount.
|
||||
SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
|
||||
|
@ -4427,31 +4438,41 @@ SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
|
|||
SDValue ShOpLo = Op.getOperand(0);
|
||||
SDValue ShOpHi = Op.getOperand(1);
|
||||
SDValue ShAmt = Op.getOperand(2);
|
||||
SDValue ARMcc;
|
||||
|
||||
assert(Op.getOpcode() == ISD::SHL_PARTS);
|
||||
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
|
||||
DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
|
||||
SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
|
||||
SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
|
||||
|
||||
// Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
|
||||
// is "undef". We wanted 0, so CSEL it directly.
|
||||
SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
|
||||
ISD::SETEQ, dl, DAG);
|
||||
SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
|
||||
LoBitsForHi =
|
||||
DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
|
||||
LoBitsForHi, CCVal, Cmp);
|
||||
|
||||
SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
|
||||
DAG.getConstant(VTBits, dl, MVT::i64));
|
||||
SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
|
||||
SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
|
||||
SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
|
||||
SDValue HiForNormalShift =
|
||||
DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
|
||||
|
||||
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
|
||||
SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
|
||||
|
||||
SDValue Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64),
|
||||
ISD::SETGE, dl, DAG);
|
||||
SDValue CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
|
||||
SDValue Hi =
|
||||
DAG.getNode(AArch64ISD::CSEL, dl, VT, Tmp3, FalseVal, CCVal, Cmp);
|
||||
Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
|
||||
dl, DAG);
|
||||
CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
|
||||
SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
|
||||
HiForNormalShift, CCVal, Cmp);
|
||||
|
||||
// AArch64 shifts of larger than register sizes are wrapped rather than
|
||||
// clamped, so we can't just emit "lo << a" if a is too big.
|
||||
SDValue TrueValLo = DAG.getConstant(0, dl, VT);
|
||||
SDValue FalseValLo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
|
||||
SDValue Lo =
|
||||
DAG.getNode(AArch64ISD::CSEL, dl, VT, TrueValLo, FalseValLo, CCVal, Cmp);
|
||||
SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
|
||||
SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
|
||||
SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
|
||||
LoForNormalShift, CCVal, Cmp);
|
||||
|
||||
SDValue Ops[2] = { Lo, Hi };
|
||||
return DAG.getMergeValues(Ops, dl);
|
||||
|
|
|
@ -2,18 +2,20 @@
|
|||
|
||||
define i128 @shl(i128 %r, i128 %s) nounwind readnone {
|
||||
; CHECK-LABEL: shl:
|
||||
; CHECK: lsl [[XREG_0:x[0-9]+]], x1, x2
|
||||
; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
|
||||
; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
|
||||
; CHECK-NEXT: lsr [[XREG_3:x[0-9]+]], x0, [[XREG_2]]
|
||||
; CHECK-NEXT: orr [[XREG_6:x[0-9]+]], [[XREG_3]], [[XREG_0]]
|
||||
; CHECK-NEXT: sub [[XREG_4:x[0-9]+]], x2, #64
|
||||
; CHECK-NEXT: lsl [[XREG_5:x[0-9]+]], x0, [[XREG_4]]
|
||||
; CHECK-NEXT: cmp [[XREG_4]], #0
|
||||
; CHECK-NEXT: csel x1, [[XREG_5]], [[XREG_6]], ge
|
||||
; CHECK-NEXT: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
|
||||
; CHECK-NEXT: csel x0, xzr, [[SMALLSHIFT_LO]], ge
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
|
||||
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
|
||||
; CHECK: lsr [[LO_FOR_HI_NORMAL:x[0-9]+]], x0, [[REV_SHIFT]]
|
||||
; CHECK: cmp x2, #0
|
||||
; CHECK: csel [[LO_FOR_HI:x[0-9]+]], xzr, [[LO_FOR_HI_NORMAL]], eq
|
||||
; CHECK: lsl [[HI_FOR_HI:x[0-9]+]], x1, x2
|
||||
; CHECK: orr [[HI_NORMAL:x[0-9]+]], [[LO_FOR_HI]], [[HI_FOR_HI]]
|
||||
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
|
||||
; CHECK: lsl [[HI_BIG_SHIFT:x[0-9]+]], x0, [[EXTRA_SHIFT]]
|
||||
; CHECK: cmp [[EXTRA_SHIFT]], #0
|
||||
; CHECK: csel x1, [[HI_BIG_SHIFT]], [[HI_NORMAL]], ge
|
||||
; CHECK: lsl [[SMALLSHIFT_LO:x[0-9]+]], x0, x2
|
||||
; CHECK: csel x0, xzr, [[SMALLSHIFT_LO]], ge
|
||||
; CHECK: ret
|
||||
|
||||
%shl = shl i128 %r, %s
|
||||
ret i128 %shl
|
||||
|
@ -21,19 +23,21 @@ define i128 @shl(i128 %r, i128 %s) nounwind readnone {
|
|||
|
||||
define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
|
||||
; CHECK-LABEL: ashr:
|
||||
; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2
|
||||
; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
|
||||
; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
|
||||
; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
|
||||
; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
|
||||
; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
|
||||
; CHECK-NEXT: asr [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
|
||||
; CHECK-NEXT: cmp [[XREG_5]], #0
|
||||
; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge
|
||||
; CHECK-NEXT: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
|
||||
; CHECK-NEXT: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
|
||||
; CHECK-NEXT: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
|
||||
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
|
||||
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
|
||||
; CHECK: cmp x2, #0
|
||||
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
|
||||
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
|
||||
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
|
||||
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
|
||||
; CHECK: asr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
|
||||
; CHECK: cmp [[EXTRA_SHIFT]], #0
|
||||
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
|
||||
; CHECK: asr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
|
||||
; CHECK: asr [[BIGSHIFT_HI:x[0-9]+]], x1, #63
|
||||
; CHECK: csel x1, [[BIGSHIFT_HI]], [[SMALLSHIFT_HI]], ge
|
||||
; CHECK: ret
|
||||
|
||||
%shr = ashr i128 %r, %s
|
||||
ret i128 %shr
|
||||
|
@ -41,18 +45,20 @@ define i128 @ashr(i128 %r, i128 %s) nounwind readnone {
|
|||
|
||||
define i128 @lshr(i128 %r, i128 %s) nounwind readnone {
|
||||
; CHECK-LABEL: lshr:
|
||||
; CHECK: lsr [[XREG_0:x[0-9]+]], x0, x2
|
||||
; CHECK-NEXT: orr w[[XREG_1:[0-9]+]], wzr, #0x40
|
||||
; CHECK-NEXT: sub [[XREG_2:x[0-9]+]], x[[XREG_1]], x2
|
||||
; CHECK-NEXT: lsl [[XREG_3:x[0-9]+]], x1, [[XREG_2]]
|
||||
; CHECK-NEXT: orr [[XREG_4:x[0-9]+]], [[XREG_0]], [[XREG_3]]
|
||||
; CHECK-NEXT: sub [[XREG_5:x[0-9]+]], x2, #64
|
||||
; CHECK-NEXT: lsr [[XREG_6:x[0-9]+]], x1, [[XREG_5]]
|
||||
; CHECK-NEXT: cmp [[XREG_5]], #0
|
||||
; CHECK-NEXT: csel x0, [[XREG_6]], [[XREG_4]], ge
|
||||
; CHECK-NEXT: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
|
||||
; CHECK-NEXT: csel x1, xzr, [[SMALLSHIFT_HI]], ge
|
||||
; CHECK-NEXT: ret
|
||||
; CHECK: orr w[[SIXTY_FOUR:[0-9]+]], wzr, #0x40
|
||||
; CHECK: sub [[REV_SHIFT:x[0-9]+]], x[[SIXTY_FOUR]], x2
|
||||
; CHECK: lsl [[HI_FOR_LO_NORMAL:x[0-9]+]], x1, [[REV_SHIFT]]
|
||||
; CHECK: cmp x2, #0
|
||||
; CHECK: csel [[HI_FOR_LO:x[0-9]+]], xzr, [[HI_FOR_LO_NORMAL]], eq
|
||||
; CHECK: lsr [[LO_FOR_LO:x[0-9]+]], x0, x2
|
||||
; CHECK: orr [[LO_NORMAL:x[0-9]+]], [[LO_FOR_LO]], [[HI_FOR_LO]]
|
||||
; CHECK: sub [[EXTRA_SHIFT:x[0-9]+]], x2, #64
|
||||
; CHECK: lsr [[LO_BIG_SHIFT:x[0-9]+]], x1, [[EXTRA_SHIFT]]
|
||||
; CHECK: cmp [[EXTRA_SHIFT]], #0
|
||||
; CHECK: csel x0, [[LO_BIG_SHIFT]], [[LO_NORMAL]], ge
|
||||
; CHECK: lsr [[SMALLSHIFT_HI:x[0-9]+]], x1, x2
|
||||
; CHECK: csel x1, xzr, [[SMALLSHIFT_HI]], ge
|
||||
; CHECK: ret
|
||||
|
||||
%shr = lshr i128 %r, %s
|
||||
ret i128 %shr
|
||||
|
|
Loading…
Reference in New Issue