forked from OSchip/llvm-project
[RISCV] Custom lower SHL_PARTS, SRA_PARTS, SRL_PARTS
When not optimizing for minimum size (-Oz) we custom lower wide shifts (SHL_PARTS, SRA_PARTS, SRL_PARTS) instead of expanding to a libcall. Differential Revision: https://reviews.llvm.org/D59477 llvm-svn: 358498
This commit is contained in:
parent
4d88b17b3f
commit
20d2424016
|
@ -125,9 +125,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
|
|||
setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
|
||||
setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
|
||||
|
||||
setOperationAction(ISD::SHL_PARTS, XLenVT, Expand);
|
||||
setOperationAction(ISD::SRL_PARTS, XLenVT, Expand);
|
||||
setOperationAction(ISD::SRA_PARTS, XLenVT, Expand);
|
||||
setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
|
||||
setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
|
||||
setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
|
||||
|
||||
setOperationAction(ISD::ROTL, XLenVT, Expand);
|
||||
setOperationAction(ISD::ROTR, XLenVT, Expand);
|
||||
|
@ -360,6 +360,12 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
|
|||
return lowerFRAMEADDR(Op, DAG);
|
||||
case ISD::RETURNADDR:
|
||||
return lowerRETURNADDR(Op, DAG);
|
||||
case ISD::SHL_PARTS:
|
||||
return lowerShiftLeftParts(Op, DAG);
|
||||
case ISD::SRA_PARTS:
|
||||
return lowerShiftRightParts(Op, DAG, true);
|
||||
case ISD::SRL_PARTS:
|
||||
return lowerShiftRightParts(Op, DAG, false);
|
||||
case ISD::BITCAST: {
|
||||
assert(Subtarget.is64Bit() && Subtarget.hasStdExtF() &&
|
||||
"Unexpected custom legalisation");
|
||||
|
@ -568,6 +574,97 @@ SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
|
|||
return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
SDLoc DL(Op);
|
||||
SDValue Lo = Op.getOperand(0);
|
||||
SDValue Hi = Op.getOperand(1);
|
||||
SDValue Shamt = Op.getOperand(2);
|
||||
EVT VT = Lo.getValueType();
|
||||
|
||||
// if Shamt-XLEN < 0: // Shamt < XLEN
|
||||
// Lo = Lo << Shamt
|
||||
// Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
|
||||
// else:
|
||||
// Lo = 0
|
||||
// Hi = Lo << (Shamt-XLEN)
|
||||
|
||||
SDValue Zero = DAG.getConstant(0, DL, VT);
|
||||
SDValue One = DAG.getConstant(1, DL, VT);
|
||||
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
|
||||
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
|
||||
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
|
||||
SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
|
||||
|
||||
SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
|
||||
SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
|
||||
SDValue ShiftRightLo =
|
||||
DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
|
||||
SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
|
||||
SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
|
||||
SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
|
||||
|
||||
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
|
||||
|
||||
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
|
||||
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
|
||||
|
||||
SDValue Parts[2] = {Lo, Hi};
|
||||
return DAG.getMergeValues(Parts, DL);
|
||||
}
|
||||
|
||||
SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
|
||||
bool IsSRA) const {
|
||||
SDLoc DL(Op);
|
||||
SDValue Lo = Op.getOperand(0);
|
||||
SDValue Hi = Op.getOperand(1);
|
||||
SDValue Shamt = Op.getOperand(2);
|
||||
EVT VT = Lo.getValueType();
|
||||
|
||||
// SRA expansion:
|
||||
// if Shamt-XLEN < 0: // Shamt < XLEN
|
||||
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
|
||||
// Hi = Hi >>s Shamt
|
||||
// else:
|
||||
// Lo = Hi >>s (Shamt-XLEN);
|
||||
// Hi = Hi >>s (XLEN-1)
|
||||
//
|
||||
// SRL expansion:
|
||||
// if Shamt-XLEN < 0: // Shamt < XLEN
|
||||
// Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
|
||||
// Hi = Hi >>u Shamt
|
||||
// else:
|
||||
// Lo = Hi >>u (Shamt-XLEN);
|
||||
// Hi = 0;
|
||||
|
||||
unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
|
||||
|
||||
SDValue Zero = DAG.getConstant(0, DL, VT);
|
||||
SDValue One = DAG.getConstant(1, DL, VT);
|
||||
SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
|
||||
SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
|
||||
SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
|
||||
SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
|
||||
|
||||
SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
|
||||
SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
|
||||
SDValue ShiftLeftHi =
|
||||
DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
|
||||
SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
|
||||
SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
|
||||
SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
|
||||
SDValue HiFalse =
|
||||
IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
|
||||
|
||||
SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
|
||||
|
||||
Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
|
||||
Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
|
||||
|
||||
SDValue Parts[2] = {Lo, Hi};
|
||||
return DAG.getMergeValues(Parts, DL);
|
||||
}
|
||||
|
||||
// Returns the opcode of the target-specific SDNode that implements the 32-bit
|
||||
// form of the given Opcode.
|
||||
static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
|
||||
|
|
|
@ -114,6 +114,12 @@ public:
|
|||
return ISD::SIGN_EXTEND;
|
||||
}
|
||||
|
||||
bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override {
|
||||
if (DAG.getMachineFunction().getFunction().hasMinSize())
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
void analyzeInputArgs(MachineFunction &MF, CCState &CCInfo,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
|
@ -152,6 +158,8 @@ private:
|
|||
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerShiftRightParts(SDValue Op, SelectionDAG &DAG, bool IsSRA) const;
|
||||
|
||||
bool isEligibleForTailCallOptimization(
|
||||
CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
|
||||
|
|
|
@ -211,11 +211,20 @@ define i64 @sll(i64 %a, i64 %b) nounwind {
|
|||
;
|
||||
; RV32I-LABEL: sll:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -16
|
||||
; RV32I-NEXT: sw ra, 12(sp)
|
||||
; RV32I-NEXT: call __ashldi3
|
||||
; RV32I-NEXT: lw ra, 12(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 16
|
||||
; RV32I-NEXT: addi a3, a2, -32
|
||||
; RV32I-NEXT: bltz a3, .LBB11_2
|
||||
; RV32I-NEXT: # %bb.1:
|
||||
; RV32I-NEXT: sll a1, a0, a3
|
||||
; RV32I-NEXT: mv a0, zero
|
||||
; RV32I-NEXT: ret
|
||||
; RV32I-NEXT: .LBB11_2:
|
||||
; RV32I-NEXT: addi a3, zero, 31
|
||||
; RV32I-NEXT: sub a3, a3, a2
|
||||
; RV32I-NEXT: srli a4, a0, 1
|
||||
; RV32I-NEXT: srl a3, a4, a3
|
||||
; RV32I-NEXT: sll a1, a1, a2
|
||||
; RV32I-NEXT: or a1, a1, a3
|
||||
; RV32I-NEXT: sll a0, a0, a2
|
||||
; RV32I-NEXT: ret
|
||||
%1 = shl i64 %a, %b
|
||||
ret i64 %1
|
||||
|
@ -288,11 +297,20 @@ define i64 @srl(i64 %a, i64 %b) nounwind {
|
|||
;
|
||||
; RV32I-LABEL: srl:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -16
|
||||
; RV32I-NEXT: sw ra, 12(sp)
|
||||
; RV32I-NEXT: call __lshrdi3
|
||||
; RV32I-NEXT: lw ra, 12(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 16
|
||||
; RV32I-NEXT: addi a3, a2, -32
|
||||
; RV32I-NEXT: bltz a3, .LBB15_2
|
||||
; RV32I-NEXT: # %bb.1:
|
||||
; RV32I-NEXT: srl a0, a1, a3
|
||||
; RV32I-NEXT: mv a1, zero
|
||||
; RV32I-NEXT: ret
|
||||
; RV32I-NEXT: .LBB15_2:
|
||||
; RV32I-NEXT: addi a3, zero, 31
|
||||
; RV32I-NEXT: sub a3, a3, a2
|
||||
; RV32I-NEXT: slli a4, a1, 1
|
||||
; RV32I-NEXT: sll a3, a4, a3
|
||||
; RV32I-NEXT: srl a0, a0, a2
|
||||
; RV32I-NEXT: or a0, a0, a3
|
||||
; RV32I-NEXT: srl a1, a1, a2
|
||||
; RV32I-NEXT: ret
|
||||
%1 = lshr i64 %a, %b
|
||||
ret i64 %1
|
||||
|
@ -306,11 +324,20 @@ define i64 @sra(i64 %a, i64 %b) nounwind {
|
|||
;
|
||||
; RV32I-LABEL: sra:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -16
|
||||
; RV32I-NEXT: sw ra, 12(sp)
|
||||
; RV32I-NEXT: call __ashrdi3
|
||||
; RV32I-NEXT: lw ra, 12(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 16
|
||||
; RV32I-NEXT: addi a3, a2, -32
|
||||
; RV32I-NEXT: bltz a3, .LBB16_2
|
||||
; RV32I-NEXT: # %bb.1:
|
||||
; RV32I-NEXT: sra a0, a1, a3
|
||||
; RV32I-NEXT: srai a1, a1, 31
|
||||
; RV32I-NEXT: ret
|
||||
; RV32I-NEXT: .LBB16_2:
|
||||
; RV32I-NEXT: addi a3, zero, 31
|
||||
; RV32I-NEXT: sub a3, a3, a2
|
||||
; RV32I-NEXT: slli a4, a1, 1
|
||||
; RV32I-NEXT: sll a3, a4, a3
|
||||
; RV32I-NEXT: srl a0, a0, a2
|
||||
; RV32I-NEXT: or a0, a0, a3
|
||||
; RV32I-NEXT: sra a1, a1, a2
|
||||
; RV32I-NEXT: ret
|
||||
%1 = ashr i64 %a, %b
|
||||
ret i64 %1
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||
; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck %s -check-prefix=RV32I
|
||||
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
|
||||
; RUN: | FileCheck %s -check-prefix=RV64I
|
||||
|
||||
; Basic shift support is tested as part of ALU.ll. This file ensures that
|
||||
; shifts which may not be supported natively are lowered properly.
|
||||
|
@ -8,12 +10,44 @@
|
|||
define i64 @lshr64(i64 %a, i64 %b) nounwind {
|
||||
; RV32I-LABEL: lshr64:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi a3, a2, -32
|
||||
; RV32I-NEXT: bltz a3, .LBB0_2
|
||||
; RV32I-NEXT: # %bb.1:
|
||||
; RV32I-NEXT: srl a0, a1, a3
|
||||
; RV32I-NEXT: mv a1, zero
|
||||
; RV32I-NEXT: ret
|
||||
; RV32I-NEXT: .LBB0_2:
|
||||
; RV32I-NEXT: addi a3, zero, 31
|
||||
; RV32I-NEXT: sub a3, a3, a2
|
||||
; RV32I-NEXT: slli a4, a1, 1
|
||||
; RV32I-NEXT: sll a3, a4, a3
|
||||
; RV32I-NEXT: srl a0, a0, a2
|
||||
; RV32I-NEXT: or a0, a0, a3
|
||||
; RV32I-NEXT: srl a1, a1, a2
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: lshr64:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: srl a0, a0, a1
|
||||
; RV64I-NEXT: ret
|
||||
%1 = lshr i64 %a, %b
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @lshr64_minsize(i64 %a, i64 %b) minsize nounwind {
|
||||
; RV32I-LABEL: lshr64_minsize:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -16
|
||||
; RV32I-NEXT: sw ra, 12(sp)
|
||||
; RV32I-NEXT: call __lshrdi3
|
||||
; RV32I-NEXT: lw ra, 12(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 16
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: lshr64_minsize:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: srl a0, a0, a1
|
||||
; RV64I-NEXT: ret
|
||||
%1 = lshr i64 %a, %b
|
||||
ret i64 %1
|
||||
}
|
||||
|
@ -21,12 +55,44 @@ define i64 @lshr64(i64 %a, i64 %b) nounwind {
|
|||
define i64 @ashr64(i64 %a, i64 %b) nounwind {
|
||||
; RV32I-LABEL: ashr64:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi a3, a2, -32
|
||||
; RV32I-NEXT: bltz a3, .LBB2_2
|
||||
; RV32I-NEXT: # %bb.1:
|
||||
; RV32I-NEXT: sra a0, a1, a3
|
||||
; RV32I-NEXT: srai a1, a1, 31
|
||||
; RV32I-NEXT: ret
|
||||
; RV32I-NEXT: .LBB2_2:
|
||||
; RV32I-NEXT: addi a3, zero, 31
|
||||
; RV32I-NEXT: sub a3, a3, a2
|
||||
; RV32I-NEXT: slli a4, a1, 1
|
||||
; RV32I-NEXT: sll a3, a4, a3
|
||||
; RV32I-NEXT: srl a0, a0, a2
|
||||
; RV32I-NEXT: or a0, a0, a3
|
||||
; RV32I-NEXT: sra a1, a1, a2
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: ashr64:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: sra a0, a0, a1
|
||||
; RV64I-NEXT: ret
|
||||
%1 = ashr i64 %a, %b
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @ashr64_minsize(i64 %a, i64 %b) minsize nounwind {
|
||||
; RV32I-LABEL: ashr64_minsize:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -16
|
||||
; RV32I-NEXT: sw ra, 12(sp)
|
||||
; RV32I-NEXT: call __ashrdi3
|
||||
; RV32I-NEXT: lw ra, 12(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 16
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: ashr64_minsize:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: sra a0, a0, a1
|
||||
; RV64I-NEXT: ret
|
||||
%1 = ashr i64 %a, %b
|
||||
ret i64 %1
|
||||
}
|
||||
|
@ -34,12 +100,203 @@ define i64 @ashr64(i64 %a, i64 %b) nounwind {
|
|||
define i64 @shl64(i64 %a, i64 %b) nounwind {
|
||||
; RV32I-LABEL: shl64:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi a3, a2, -32
|
||||
; RV32I-NEXT: bltz a3, .LBB4_2
|
||||
; RV32I-NEXT: # %bb.1:
|
||||
; RV32I-NEXT: sll a1, a0, a3
|
||||
; RV32I-NEXT: mv a0, zero
|
||||
; RV32I-NEXT: ret
|
||||
; RV32I-NEXT: .LBB4_2:
|
||||
; RV32I-NEXT: addi a3, zero, 31
|
||||
; RV32I-NEXT: sub a3, a3, a2
|
||||
; RV32I-NEXT: srli a4, a0, 1
|
||||
; RV32I-NEXT: srl a3, a4, a3
|
||||
; RV32I-NEXT: sll a1, a1, a2
|
||||
; RV32I-NEXT: or a1, a1, a3
|
||||
; RV32I-NEXT: sll a0, a0, a2
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: shl64:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: sll a0, a0, a1
|
||||
; RV64I-NEXT: ret
|
||||
%1 = shl i64 %a, %b
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i64 @shl64_minsize(i64 %a, i64 %b) minsize nounwind {
|
||||
; RV32I-LABEL: shl64_minsize:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -16
|
||||
; RV32I-NEXT: sw ra, 12(sp)
|
||||
; RV32I-NEXT: call __ashldi3
|
||||
; RV32I-NEXT: lw ra, 12(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 16
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: shl64_minsize:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: sll a0, a0, a1
|
||||
; RV64I-NEXT: ret
|
||||
%1 = shl i64 %a, %b
|
||||
ret i64 %1
|
||||
}
|
||||
|
||||
define i128 @lshr128(i128 %a, i128 %b) nounwind {
|
||||
; RV32I-LABEL: lshr128:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -48
|
||||
; RV32I-NEXT: sw ra, 44(sp)
|
||||
; RV32I-NEXT: sw s0, 40(sp)
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: lw a0, 12(a1)
|
||||
; RV32I-NEXT: sw a0, 20(sp)
|
||||
; RV32I-NEXT: lw a0, 8(a1)
|
||||
; RV32I-NEXT: sw a0, 16(sp)
|
||||
; RV32I-NEXT: lw a0, 4(a1)
|
||||
; RV32I-NEXT: sw a0, 12(sp)
|
||||
; RV32I-NEXT: lw a0, 0(a1)
|
||||
; RV32I-NEXT: sw a0, 8(sp)
|
||||
; RV32I-NEXT: lw a2, 0(a2)
|
||||
; RV32I-NEXT: addi a0, sp, 24
|
||||
; RV32I-NEXT: addi a1, sp, 8
|
||||
; RV32I-NEXT: call __lshrti3
|
||||
; RV32I-NEXT: lw a0, 36(sp)
|
||||
; RV32I-NEXT: sw a0, 12(s0)
|
||||
; RV32I-NEXT: lw a0, 32(sp)
|
||||
; RV32I-NEXT: sw a0, 8(s0)
|
||||
; RV32I-NEXT: lw a0, 28(sp)
|
||||
; RV32I-NEXT: sw a0, 4(s0)
|
||||
; RV32I-NEXT: lw a0, 24(sp)
|
||||
; RV32I-NEXT: sw a0, 0(s0)
|
||||
; RV32I-NEXT: lw s0, 40(sp)
|
||||
; RV32I-NEXT: lw ra, 44(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 48
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: lshr128:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi a3, a2, -64
|
||||
; RV64I-NEXT: bltz a3, .LBB6_2
|
||||
; RV64I-NEXT: # %bb.1:
|
||||
; RV64I-NEXT: srl a0, a1, a3
|
||||
; RV64I-NEXT: mv a1, zero
|
||||
; RV64I-NEXT: ret
|
||||
; RV64I-NEXT: .LBB6_2:
|
||||
; RV64I-NEXT: addi a3, zero, 63
|
||||
; RV64I-NEXT: sub a3, a3, a2
|
||||
; RV64I-NEXT: slli a4, a1, 1
|
||||
; RV64I-NEXT: sll a3, a4, a3
|
||||
; RV64I-NEXT: srl a0, a0, a2
|
||||
; RV64I-NEXT: or a0, a0, a3
|
||||
; RV64I-NEXT: srl a1, a1, a2
|
||||
; RV64I-NEXT: ret
|
||||
%1 = lshr i128 %a, %b
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @ashr128(i128 %a, i128 %b) nounwind {
|
||||
; RV32I-LABEL: ashr128:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -48
|
||||
; RV32I-NEXT: sw ra, 44(sp)
|
||||
; RV32I-NEXT: sw s0, 40(sp)
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: lw a0, 12(a1)
|
||||
; RV32I-NEXT: sw a0, 20(sp)
|
||||
; RV32I-NEXT: lw a0, 8(a1)
|
||||
; RV32I-NEXT: sw a0, 16(sp)
|
||||
; RV32I-NEXT: lw a0, 4(a1)
|
||||
; RV32I-NEXT: sw a0, 12(sp)
|
||||
; RV32I-NEXT: lw a0, 0(a1)
|
||||
; RV32I-NEXT: sw a0, 8(sp)
|
||||
; RV32I-NEXT: lw a2, 0(a2)
|
||||
; RV32I-NEXT: addi a0, sp, 24
|
||||
; RV32I-NEXT: addi a1, sp, 8
|
||||
; RV32I-NEXT: call __ashrti3
|
||||
; RV32I-NEXT: lw a0, 36(sp)
|
||||
; RV32I-NEXT: sw a0, 12(s0)
|
||||
; RV32I-NEXT: lw a0, 32(sp)
|
||||
; RV32I-NEXT: sw a0, 8(s0)
|
||||
; RV32I-NEXT: lw a0, 28(sp)
|
||||
; RV32I-NEXT: sw a0, 4(s0)
|
||||
; RV32I-NEXT: lw a0, 24(sp)
|
||||
; RV32I-NEXT: sw a0, 0(s0)
|
||||
; RV32I-NEXT: lw s0, 40(sp)
|
||||
; RV32I-NEXT: lw ra, 44(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 48
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: ashr128:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi a3, a2, -64
|
||||
; RV64I-NEXT: bltz a3, .LBB7_2
|
||||
; RV64I-NEXT: # %bb.1:
|
||||
; RV64I-NEXT: sra a0, a1, a3
|
||||
; RV64I-NEXT: srai a1, a1, 63
|
||||
; RV64I-NEXT: ret
|
||||
; RV64I-NEXT: .LBB7_2:
|
||||
; RV64I-NEXT: addi a3, zero, 63
|
||||
; RV64I-NEXT: sub a3, a3, a2
|
||||
; RV64I-NEXT: slli a4, a1, 1
|
||||
; RV64I-NEXT: sll a3, a4, a3
|
||||
; RV64I-NEXT: srl a0, a0, a2
|
||||
; RV64I-NEXT: or a0, a0, a3
|
||||
; RV64I-NEXT: sra a1, a1, a2
|
||||
; RV64I-NEXT: ret
|
||||
%1 = ashr i128 %a, %b
|
||||
ret i128 %1
|
||||
}
|
||||
|
||||
define i128 @shl128(i128 %a, i128 %b) nounwind {
|
||||
; RV32I-LABEL: shl128:
|
||||
; RV32I: # %bb.0:
|
||||
; RV32I-NEXT: addi sp, sp, -48
|
||||
; RV32I-NEXT: sw ra, 44(sp)
|
||||
; RV32I-NEXT: sw s0, 40(sp)
|
||||
; RV32I-NEXT: mv s0, a0
|
||||
; RV32I-NEXT: lw a0, 12(a1)
|
||||
; RV32I-NEXT: sw a0, 20(sp)
|
||||
; RV32I-NEXT: lw a0, 8(a1)
|
||||
; RV32I-NEXT: sw a0, 16(sp)
|
||||
; RV32I-NEXT: lw a0, 4(a1)
|
||||
; RV32I-NEXT: sw a0, 12(sp)
|
||||
; RV32I-NEXT: lw a0, 0(a1)
|
||||
; RV32I-NEXT: sw a0, 8(sp)
|
||||
; RV32I-NEXT: lw a2, 0(a2)
|
||||
; RV32I-NEXT: addi a0, sp, 24
|
||||
; RV32I-NEXT: addi a1, sp, 8
|
||||
; RV32I-NEXT: call __ashlti3
|
||||
; RV32I-NEXT: lw a0, 36(sp)
|
||||
; RV32I-NEXT: sw a0, 12(s0)
|
||||
; RV32I-NEXT: lw a0, 32(sp)
|
||||
; RV32I-NEXT: sw a0, 8(s0)
|
||||
; RV32I-NEXT: lw a0, 28(sp)
|
||||
; RV32I-NEXT: sw a0, 4(s0)
|
||||
; RV32I-NEXT: lw a0, 24(sp)
|
||||
; RV32I-NEXT: sw a0, 0(s0)
|
||||
; RV32I-NEXT: lw s0, 40(sp)
|
||||
; RV32I-NEXT: lw ra, 44(sp)
|
||||
; RV32I-NEXT: addi sp, sp, 48
|
||||
; RV32I-NEXT: ret
|
||||
;
|
||||
; RV64I-LABEL: shl128:
|
||||
; RV64I: # %bb.0:
|
||||
; RV64I-NEXT: addi a3, a2, -64
|
||||
; RV64I-NEXT: bltz a3, .LBB8_2
|
||||
; RV64I-NEXT: # %bb.1:
|
||||
; RV64I-NEXT: sll a1, a0, a3
|
||||
; RV64I-NEXT: mv a0, zero
|
||||
; RV64I-NEXT: ret
|
||||
; RV64I-NEXT: .LBB8_2:
|
||||
; RV64I-NEXT: addi a3, zero, 63
|
||||
; RV64I-NEXT: sub a3, a3, a2
|
||||
; RV64I-NEXT: srli a4, a0, 1
|
||||
; RV64I-NEXT: srl a3, a4, a3
|
||||
; RV64I-NEXT: sll a1, a1, a2
|
||||
; RV64I-NEXT: or a1, a1, a3
|
||||
; RV64I-NEXT: sll a0, a0, a2
|
||||
; RV64I-NEXT: ret
|
||||
%1 = shl i128 %a, %b
|
||||
ret i128 %1
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue