From 43a1756a5d533bbf29bc39f49f04fc76f5c30ce9 Mon Sep 17 00:00:00 2001 From: Annika Date: Sat, 12 Feb 2022 06:11:27 +0800 Subject: [PATCH] m68k: Support bit shifts on 64-bit integers As per https://bugs.llvm.org/show_bug.cgi?id=52119. Reviewed By: myhsu Differential Revision: https://reviews.llvm.org/D111497 --- llvm/lib/Target/M68k/M68kISelLowering.cpp | 105 +++++++++++++++++++ llvm/lib/Target/M68k/M68kISelLowering.h | 2 + llvm/lib/Target/M68k/M68kRegisterInfo.h | 8 ++ llvm/test/CodeGen/M68k/Arith/bitwise.ll | 120 ++++++++++++++++++++++ 4 files changed, 235 insertions(+) diff --git a/llvm/lib/Target/M68k/M68kISelLowering.cpp b/llvm/lib/Target/M68k/M68kISelLowering.cpp index dba190a2ebc0..a190057840cd 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.cpp +++ b/llvm/lib/Target/M68k/M68kISelLowering.cpp @@ -101,6 +101,9 @@ M68kTargetLowering::M68kTargetLowering(const M68kTargetMachine &TM, setOperationAction(OP, MVT::i32, Expand); } + for (auto OP : {ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS}) + setOperationAction(OP, MVT::i32, Custom); + // Add/Sub overflow ops with MVT::Glues are lowered to CCR dependences. for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) { setOperationAction(ISD::ADDC, VT, Custom); @@ -1354,6 +1357,12 @@ SDValue M68kTargetLowering::LowerOperation(SDValue Op, return LowerVASTART(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); + case ISD::SHL_PARTS: + return LowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + return LowerShiftRightParts(Op, DAG, true); + case ISD::SRL_PARTS: + return LowerShiftRightParts(Op, DAG, false); } } @@ -3239,6 +3248,102 @@ SDValue M68kTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue M68kTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // if Shamt - register size < 0: // Shamt < register size + // Lo = Lo << Shamt + // Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (register size - 1 ^ Shamt)) + // else: + // Lo = 0 + // Hi = Lo << (Shamt - register size) + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); + SDValue ShamtMinusRegisterSize = + DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); + SDValue RegisterSizeMinus1Shamt = + DAG.getNode(ISD::XOR, DL, VT, RegisterSizeMinus1, Shamt); + + SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt); + SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One); + SDValue ShiftRightLo = + DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, RegisterSizeMinus1Shamt); + SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt); + SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo); + SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusRegisterSize); + + SDValue CC = + DAG.getSetCC(DL, MVT::i8, ShamtMinusRegisterSize, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + return DAG.getMergeValues({Lo, Hi}, DL); +} + +SDValue M68kTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, + bool IsSRA) const { + SDLoc DL(Op); + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shamt = Op.getOperand(2); + EVT VT = Lo.getValueType(); + + // SRA expansion: + // if Shamt - register size < 0: // Shamt < register size + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (register size - 1 ^ Shamt)) + // Hi = Hi >>s Shamt + // else: + // Lo = Hi >>s (Shamt - register size); + // Hi = Hi >>s (register size - 1) + // + // SRL expansion: + // if Shamt - register size < 0: // Shamt < register size + // Lo = (Lo >>u Shamt) | ((Hi << 1) << (register size - 1 ^ Shamt)) + // Hi = Hi >>u Shamt + // else: + // Lo = Hi >>u (Shamt - register size); + // Hi = 0; + + unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL; + + SDValue Zero = DAG.getConstant(0, DL, VT); + SDValue One = DAG.getConstant(1, DL, VT); + SDValue MinusRegisterSize = DAG.getConstant(-32, DL, VT); + SDValue RegisterSizeMinus1 = DAG.getConstant(32 - 1, DL, VT); + SDValue ShamtMinusRegisterSize = + DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusRegisterSize); + SDValue RegisterSizeMinus1Shamt = + DAG.getNode(ISD::XOR, DL, VT, RegisterSizeMinus1, Shamt); + + SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt); + SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One); + SDValue ShiftLeftHi = + DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, RegisterSizeMinus1Shamt); + SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi); + SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt); + SDValue LoFalse = + DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusRegisterSize); + SDValue HiFalse = + IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, RegisterSizeMinus1) : Zero; + + SDValue CC = + DAG.getSetCC(DL, MVT::i8, ShamtMinusRegisterSize, Zero, ISD::SETLT); + + Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse); + Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse); + + return DAG.getMergeValues({Lo, Hi}, DL); +} + //===----------------------------------------------------------------------===// // DAG Combine //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/M68k/M68kISelLowering.h b/llvm/lib/Target/M68k/M68kISelLowering.h index 9375a99962eb..f759a7d939c8 100644 --- a/llvm/lib/Target/M68k/M68kISelLowering.h +++ b/llvm/lib/Target/M68k/M68kISelLowering.h @@ -220,6 +220,8 @@ private: SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, diff --git a/llvm/lib/Target/M68k/M68kRegisterInfo.h b/llvm/lib/Target/M68k/M68kRegisterInfo.h index 7f822e1cb34f..fc55e19a958b 100644 --- a/llvm/lib/Target/M68k/M68kRegisterInfo.h +++ b/llvm/lib/Target/M68k/M68kRegisterInfo.h @@ -97,6 +97,14 @@ public: bool canRealignStack(const MachineFunction &MF) const override; Register getFrameRegister(const MachineFunction &MF) const override; + + const TargetRegisterClass * + getCrossCopyRegClass(const TargetRegisterClass *RC) const override { + if (RC == &M68k::CCRCRegClass) + return &M68k::DR32RegClass; + return RC; + } + unsigned getStackRegister() const { return StackPtr; } unsigned getBaseRegister() const { return BasePtr; } unsigned getGlobalBaseRegister() const { return GlobalBasePtr; } diff --git a/llvm/test/CodeGen/M68k/Arith/bitwise.ll b/llvm/test/CodeGen/M68k/Arith/bitwise.ll index d5fe191ab174..fa8ea1370f86 100644 --- a/llvm/test/CodeGen/M68k/Arith/bitwise.ll +++ b/llvm/test/CodeGen/M68k/Arith/bitwise.ll @@ -230,3 +230,123 @@ define i32 @eoril(i32 %a) nounwind { %1 = xor i32 %a, 305419896 ret i32 %1 } + +define i64 @lshr64(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: lshr64: +; CHECK: ; %bb.0: +; CHECK-NEXT: suba.l #12, %sp +; CHECK-NEXT: movem.l %d2-%d4, (0,%sp) ; 16-byte Folded Spill +; CHECK-NEXT: move.l (28,%sp), %d3 +; CHECK-NEXT: move.l (16,%sp), %d2 +; CHECK-NEXT: move.l %d3, %d1 +; CHECK-NEXT: add.l #-32, %d1 +; CHECK-NEXT: bmi .LBB18_1 +; CHECK-NEXT: ; %bb.2: +; CHECK-NEXT: move.l #0, %d0 +; CHECK-NEXT: bra .LBB18_3 +; CHECK-NEXT: .LBB18_1: +; CHECK-NEXT: move.l %d2, %d0 +; CHECK-NEXT: lsr.l %d3, %d0 +; CHECK-NEXT: .LBB18_3: +; CHECK-NEXT: move.l %d3, %d4 +; CHECK-NEXT: add.l #-32, %d4 +; CHECK-NEXT: bmi .LBB18_4 +; CHECK-NEXT: ; %bb.5: +; CHECK-NEXT: lsr.l %d1, %d2 +; CHECK-NEXT: move.l %d2, %d1 +; CHECK-NEXT: bra .LBB18_6 +; CHECK-NEXT: .LBB18_4: +; CHECK-NEXT: move.l %d3, %d4 +; CHECK-NEXT: eori.l #31, %d4 +; CHECK-NEXT: lsl.l #1, %d2 +; CHECK-NEXT: move.l (20,%sp), %d1 +; CHECK-NEXT: lsl.l %d4, %d2 +; CHECK-NEXT: lsr.l %d3, %d1 +; CHECK-NEXT: or.l %d2, %d1 +; CHECK-NEXT: .LBB18_6: +; CHECK-NEXT: movem.l (0,%sp), %d2-%d4 ; 16-byte Folded Reload +; CHECK-NEXT: adda.l #12, %sp +; CHECK-NEXT: rts + %1 = lshr i64 %a, %b + ret i64 %1 +} + +define i64 @ashr64(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: ashr64: +; CHECK: ; %bb.0: +; CHECK-NEXT: suba.l #8, %sp +; CHECK-NEXT: movem.l %d2-%d3, (0,%sp) ; 12-byte Folded Spill +; CHECK-NEXT: move.l (24,%sp), %d2 +; CHECK-NEXT: move.l (12,%sp), %d0 +; CHECK-NEXT: move.l %d2, %d3 +; CHECK-NEXT: add.l #-32, %d3 +; CHECK-NEXT: move.l %d2, %d1 +; CHECK-NEXT: add.l #-32, %d1 +; CHECK-NEXT: bmi .LBB19_1 +; CHECK-NEXT: ; %bb.2: +; CHECK-NEXT: move.l %d0, %d1 +; CHECK-NEXT: asr.l %d3, %d1 +; CHECK-NEXT: bra .LBB19_3 +; CHECK-NEXT: .LBB19_1: +; CHECK-NEXT: move.l %d2, %d1 +; CHECK-NEXT: eori.l #31, %d1 +; CHECK-NEXT: move.l %d0, %d3 +; CHECK-NEXT: lsl.l #1, %d3 +; CHECK-NEXT: lsl.l %d1, %d3 +; CHECK-NEXT: move.l (16,%sp), %d1 +; CHECK-NEXT: lsr.l %d2, %d1 +; CHECK-NEXT: or.l %d3, %d1 +; CHECK-NEXT: .LBB19_3: +; CHECK-NEXT: move.l %d2, %d3 +; CHECK-NEXT: add.l #-32, %d3 +; CHECK-NEXT: bmi .LBB19_5 +; CHECK-NEXT: ; %bb.4: +; CHECK-NEXT: move.l #31, %d2 +; CHECK-NEXT: .LBB19_5: +; CHECK-NEXT: asr.l %d2, %d0 +; CHECK-NEXT: movem.l (0,%sp), %d2-%d3 ; 12-byte Folded Reload +; CHECK-NEXT: adda.l #8, %sp +; CHECK-NEXT: rts + %1 = ashr i64 %a, %b + ret i64 %1 +} + +define i64 @shl64(i64 %a, i64 %b) nounwind { +; CHECK-LABEL: shl64: +; CHECK: ; %bb.0: +; CHECK-NEXT: suba.l #12, %sp +; CHECK-NEXT: movem.l %d2-%d4, (0,%sp) ; 16-byte Folded Spill +; CHECK-NEXT: move.l (28,%sp), %d3 +; CHECK-NEXT: move.l (20,%sp), %d2 +; CHECK-NEXT: move.l %d3, %d0 +; CHECK-NEXT: add.l #-32, %d0 +; CHECK-NEXT: bmi .LBB20_1 +; CHECK-NEXT: ; %bb.2: +; CHECK-NEXT: move.l #0, %d1 +; CHECK-NEXT: bra .LBB20_3 +; CHECK-NEXT: .LBB20_1: +; CHECK-NEXT: move.l %d2, %d1 +; CHECK-NEXT: lsl.l %d3, %d1 +; CHECK-NEXT: .LBB20_3: +; CHECK-NEXT: move.l %d3, %d4 +; CHECK-NEXT: add.l #-32, %d4 +; CHECK-NEXT: bmi .LBB20_4 +; CHECK-NEXT: ; %bb.5: +; CHECK-NEXT: lsl.l %d0, %d2 +; CHECK-NEXT: move.l %d2, %d0 +; CHECK-NEXT: bra .LBB20_6 +; CHECK-NEXT: .LBB20_4: +; CHECK-NEXT: move.l %d3, %d4 +; CHECK-NEXT: eori.l #31, %d4 +; CHECK-NEXT: lsr.l #1, %d2 +; CHECK-NEXT: move.l (16,%sp), %d0 +; CHECK-NEXT: lsr.l %d4, %d2 +; CHECK-NEXT: lsl.l %d3, %d0 +; CHECK-NEXT: or.l %d2, %d0 +; CHECK-NEXT: .LBB20_6: +; CHECK-NEXT: movem.l (0,%sp), %d2-%d4 ; 16-byte Folded Reload +; CHECK-NEXT: adda.l #12, %sp +; CHECK-NEXT: rts + %1 = shl i64 %a, %b + ret i64 %1 +}