From 397f9d9d05fc9451b274e3126891236cfd72f639 Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Wed, 16 Nov 2016 20:54:28 +0000 Subject: [PATCH] ARM: fix CodeGen for 64-bit shifts. One half of the shifts obviously needed conditional selection based on whether the shift amount is more than 32-bits, but leaving the other half as the natural shift isn't acceptable either: it's undefined behaviour to shift a 32-bit value by more than 31. llvm-svn: 287149 --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 46 ++++++++++++------- llvm/test/CodeGen/ARM/shift-i64.ll | 59 +++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/shift-i64.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index 143c6d9c5430..20406ef350ec 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -4893,6 +4893,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); @@ -4903,15 +4904,23 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + ISD::SETGE, ARMcc, DAG, dl); + SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift, + ARMcc, CCR, CmpLo); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), - ISD::SETGE, ARMcc, DAG, dl); - SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); - SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, - CCR, Cmp); + + SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue HiBigShift = Opc == ISD::SRA + ? DAG.getNode(Opc, dl, VT, ShOpHi, + DAG.getConstant(VTBits - 1, dl, VT)) + : DAG.getConstant(0, dl, VT); + SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + ISD::SETGE, ARMcc, DAG, dl); + SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, + ARMcc, CCR, CmpHi); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); @@ -4929,23 +4938,28 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); SDValue ARMcc; + SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); assert(Op.getOpcode() == ISD::SHL_PARTS); SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt); SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, DAG.getConstant(VTBits, dl, MVT::i32)); - SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); - SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + ISD::SETGE, ARMcc, DAG, dl); + SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift, + ARMcc, CCR, CmpHi); - SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); - SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), + SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32), ISD::SETGE, ARMcc, DAG, dl); - SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); - SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, - CCR, Cmp); + SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, + DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo); SDValue Ops[2] = { Lo, Hi }; return DAG.getMergeValues(Ops, dl); diff --git a/llvm/test/CodeGen/ARM/shift-i64.ll b/llvm/test/CodeGen/ARM/shift-i64.ll new file mode 100644 index 000000000000..12cc5fbe03e4 --- /dev/null +++ b/llvm/test/CodeGen/ARM/shift-i64.ll @@ -0,0 +1,59 @@ +; RUN: llc -mtriple=arm-eabi %s -o - | FileCheck %s + +define i64 @test_shl(i64 %val, i64 %amt) { +; CHECK-LABEL: test_shl: + ; First calculate the hi part when the shift amount is small enough that it + ; contains components from both halves. It'll be returned in r1 so that's a + ; reasonable place for it to end up. +; CHECK: rsb [[REVERSE_SHIFT:.*]], r2, #32 +; CHECK: lsr [[TMP:.*]], r0, [[REVERSE_SHIFT]] +; CHECK: orr r1, [[TMP]], r1, lsl r2 + + ; Check whether the shift was in fact small (< 32 bits). +; CHECK: sub [[EXTRA_SHIFT:.*]], r2, #32 +; CHECK: cmp [[EXTRA_SHIFT]], #0 + + ; If not, the high part of the answer is just the low part shifted by the + ; excess. +; CHECK: lslge r1, r0, [[EXTRA_SHIFT]] + + ; The low part is either a direct shift (1st inst) or 0. We can reuse the same + ; NZCV. +; CHECK: lsl r0, r0, r2 +; CHECK: movge r0, #0 + + %res = shl i64 %val, %amt + ret i64 %res +} + +; Explanation for lshr is pretty much the reverse of shl. +define i64 @test_lshr(i64 %val, i64 %amt) { +; CHECK-LABEL: test_lshr: +; CHECK: lsr r0, r0, r2 +; CHECK: rsb [[REVERSE_SHIFT:.*]], r2, #32 +; CHECK: orr r0, r0, r1, lsl [[REVERSE_SHIFT]] +; CHECK: sub [[EXTRA_SHIFT:.*]], r2, #32 +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: lsrge r0, r1, [[EXTRA_SHIFT]] +; CHECK: lsr r1, r1, r2 +; CHECK: movge r1, #0 + %res = lshr i64 %val, %amt + ret i64 %res +} + +; One minor difference for ashr: the high bits must be "hi >> 31" if the shift +; amount is large to get the right sign bit. +define i64 @test_ashr(i64 %val, i64 %amt) { +; CHECK-LABEL: test_ashr: +; CHECK: sub [[EXTRA_SHIFT:.*]], r2, #32 +; CHECK: asr [[HI_TMP:.*]], r1, r2 +; CHECK: lsr r0, r0, r2 +; CHECK: rsb [[REVERSE_SHIFT:.*]], r2, #32 +; CHECK: cmp [[EXTRA_SHIFT]], #0 +; CHECK: orr r0, r0, r1, lsl [[REVERSE_SHIFT]] +; CHECK: asrge [[HI_TMP]], r1, #31 +; CHECK: asrge r0, r1, [[EXTRA_SHIFT]] +; CHECK: mov r1, [[HI_TMP]] + %res = ashr i64 %val, %amt + ret i64 %res +}