[RISCV] Support stack offset exceed 32-bit for RV64

Differential Revision: https://reviews.llvm.org/D61884

llvm-svn: 371810
This commit is contained in:
Shiva Chen 2019-09-13 04:03:32 +00:00
parent ea530ba3ed
commit a49a16ddd0
6 changed files with 102 additions and 58 deletions

View File

@ -74,7 +74,7 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB,
.addReg(SrcReg)
.addImm(Val)
.setMIFlag(Flag);
} else if (isInt<32>(Val)) {
} else {
unsigned Opc = RISCV::ADD;
bool isSub = Val < 0;
if (isSub) {
@ -83,13 +83,11 @@ void RISCVFrameLowering::adjustReg(MachineBasicBlock &MBB,
}
Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
TII->movImm32(MBB, MBBI, DL, ScratchReg, Val, Flag);
TII->movImm(MBB, MBBI, DL, ScratchReg, Val, Flag);
BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
.addReg(SrcReg)
.addReg(ScratchReg, RegState::Kill)
.setMIFlag(Flag);
} else {
report_fatal_error("adjustReg cannot yet handle adjustments >32 bits");
}
}

View File

@ -14,6 +14,7 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
#include "RISCVTargetMachine.h"
#include "Utils/RISCVMatInt.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@ -156,24 +157,43 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(Opcode), DstReg).addFrameIndex(FI).addImm(0);
}
void RISCVInstrInfo::movImm32(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
MachineInstr::MIFlag Flag) const {
assert(isInt<32>(Val) && "Can only materialize 32-bit constants");
void RISCVInstrInfo::movImm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
MachineInstr::MIFlag Flag) const {
MachineFunction *MF = MBB.getParent();
MachineRegisterInfo &MRI = MF->getRegInfo();
bool IsRV64 = MF->getSubtarget<RISCVSubtarget>().is64Bit();
Register SrcReg = RISCV::X0;
Register Result = MRI.createVirtualRegister(&RISCV::GPRRegClass);
unsigned Num = 0;
// TODO: If the value can be materialized using only one instruction, only
// insert a single instruction.
if (!IsRV64 && !isInt<32>(Val))
report_fatal_error("Should only materialize 32-bit constants for RV32");
uint64_t Hi20 = ((Val + 0x800) >> 12) & 0xfffff;
uint64_t Lo12 = SignExtend64<12>(Val);
BuildMI(MBB, MBBI, DL, get(RISCV::LUI), DstReg)
.addImm(Hi20)
.setMIFlag(Flag);
BuildMI(MBB, MBBI, DL, get(RISCV::ADDI), DstReg)
.addReg(DstReg, RegState::Kill)
.addImm(Lo12)
.setMIFlag(Flag);
RISCVMatInt::InstSeq Seq;
RISCVMatInt::generateInstSeq(Val, IsRV64, Seq);
assert(Seq.size() > 0);
for (RISCVMatInt::Inst &Inst : Seq) {
// Write the final result to DstReg if it's the last instruction in the Seq.
// Otherwise, write the result to the temp register.
if (++Num == Seq.size())
Result = DstReg;
if (Inst.Opc == RISCV::LUI) {
BuildMI(MBB, MBBI, DL, get(RISCV::LUI), Result)
.addImm(Inst.Imm)
.setMIFlag(Flag);
} else {
BuildMI(MBB, MBBI, DL, get(Inst.Opc), Result)
.addReg(SrcReg, RegState::Kill)
.addImm(Inst.Imm)
.setMIFlag(Flag);
}
// Only the first instruction has X0 as its source.
SrcReg = Result;
}
}
// The contents of values added to Cond are not examined outside of

View File

@ -46,10 +46,10 @@ public:
int FrameIndex, const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const override;
// Materializes the given int32 Val into DstReg.
void movImm32(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
// Materializes the given integer Val into DstReg.
void movImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, Register DstReg, uint64_t Val,
MachineInstr::MIFlag Flag = MachineInstr::NoFlags) const;
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;

View File

@ -110,7 +110,7 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// The offset won't fit in an immediate, so use a scratch register instead
// Modify Offset and FrameReg appropriately
Register ScratchReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
TII->movImm32(MBB, II, DL, ScratchReg, Offset);
TII->movImm(MBB, II, DL, ScratchReg, Offset);
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), ScratchReg)
.addReg(FrameReg)
.addReg(ScratchReg, RegState::Kill);

View File

@ -0,0 +1,38 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s | FileCheck %s
;
; The test case check that RV64 could handle the stack adjustment offset exceed
; 32-bit.
define void @foo() nounwind {
; CHECK-LABEL: foo:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: lui a0, 95
; CHECK-NEXT: addiw a0, a0, 1505
; CHECK-NEXT: slli a0, a0, 13
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: lui a0, 781250
; CHECK-NEXT: addiw a0, a0, 24
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: sd ra, 0(a0)
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: call baz
; CHECK-NEXT: lui a0, 781250
; CHECK-NEXT: addiw a0, a0, 24
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: ld ra, 0(a0)
; CHECK-NEXT: lui a0, 95
; CHECK-NEXT: addiw a0, a0, 1505
; CHECK-NEXT: slli a0, a0, 13
; CHECK-NEXT: addi a0, a0, 32
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ret
entry:
%w = alloca [100000000 x { fp128, fp128 }], align 16
%arraydecay = getelementptr inbounds [100000000 x { fp128, fp128 }], [100000000 x { fp128, fp128 }]* %w, i64 0, i64 0
call void @baz({ fp128, fp128 }* nonnull %arraydecay)
ret void
}
declare void @baz({ fp128, fp128 }*)

View File

@ -348,38 +348,38 @@ define void @caller1024() nounwind {
; RV64I-LABEL: caller1024:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1024
; RV64I-NEXT: addiw a0, a0, -1024
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1032
; RV64I-NEXT: addiw a0, a0, -1032
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: sd ra, 0(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1040
; RV64I-NEXT: addiw a0, a0, -1040
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: sd s0, 0(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1024
; RV64I-NEXT: addiw a0, a0, -1024
; RV64I-NEXT: add s0, sp, a0
; RV64I-NEXT: andi sp, sp, -1024
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: call callee
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1024
; RV64I-NEXT: addiw a0, a0, -1024
; RV64I-NEXT: sub sp, s0, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1040
; RV64I-NEXT: addiw a0, a0, -1040
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: ld s0, 0(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1032
; RV64I-NEXT: addiw a0, a0, -1032
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: ld ra, 0(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, -1024
; RV64I-NEXT: addiw a0, a0, -1024
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: ret
%1 = alloca i8, align 1024
@ -431,7 +431,6 @@ define void @caller2048() nounwind {
; RV32I-NEXT: add s0, sp, a0
; RV32I-NEXT: andi sp, sp, -2048
; RV32I-NEXT: lui a0, 1
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: call callee
@ -454,38 +453,37 @@ define void @caller2048() nounwind {
; RV64I-LABEL: caller2048:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, 2040
; RV64I-NEXT: addiw a0, a0, 2040
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: sd ra, 0(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, 2032
; RV64I-NEXT: addiw a0, a0, 2032
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: sd s0, 0(a0)
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: add s0, sp, a0
; RV64I-NEXT: andi sp, sp, -2048
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: call callee
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: sub sp, s0, a0
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, 2032
; RV64I-NEXT: addiw a0, a0, 2032
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: ld s0, 0(a0)
; RV64I-NEXT: lui a0, 1
; RV64I-NEXT: addi a0, a0, 2040
; RV64I-NEXT: addiw a0, a0, 2040
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: ld ra, 0(a0)
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: addi a0, a0, -2048
; RV64I-NEXT: addiw a0, a0, -2048
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: ret
%1 = alloca i8, align 2048
@ -522,7 +520,6 @@ define void @caller4096() nounwind {
; RV32I-LABEL: caller4096:
; RV32I: # %bb.0:
; RV32I-NEXT: lui a0, 3
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: sub sp, sp, a0
; RV32I-NEXT: lui a0, 3
; RV32I-NEXT: addi a0, a0, -4
@ -533,17 +530,14 @@ define void @caller4096() nounwind {
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: sw s0, 0(a0)
; RV32I-NEXT: lui a0, 3
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: add s0, sp, a0
; RV32I-NEXT: srli a0, sp, 12
; RV32I-NEXT: slli sp, a0, 12
; RV32I-NEXT: lui a0, 2
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: call callee
; RV32I-NEXT: lui a0, 3
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: sub sp, s0, a0
; RV32I-NEXT: lui a0, 3
; RV32I-NEXT: addi a0, a0, -8
@ -554,46 +548,40 @@ define void @caller4096() nounwind {
; RV32I-NEXT: add a0, sp, a0
; RV32I-NEXT: lw ra, 0(a0)
; RV32I-NEXT: lui a0, 3
; RV32I-NEXT: mv a0, a0
; RV32I-NEXT: add sp, sp, a0
; RV32I-NEXT: ret
;
; RV64I-LABEL: caller4096:
; RV64I: # %bb.0:
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: sub sp, sp, a0
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: addi a0, a0, -8
; RV64I-NEXT: addiw a0, a0, -8
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: sd ra, 0(a0)
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: addi a0, a0, -16
; RV64I-NEXT: addiw a0, a0, -16
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: sd s0, 0(a0)
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: add s0, sp, a0
; RV64I-NEXT: srli a0, sp, 12
; RV64I-NEXT: slli sp, a0, 12
; RV64I-NEXT: lui a0, 2
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: call callee
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: sub sp, s0, a0
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: addi a0, a0, -16
; RV64I-NEXT: addiw a0, a0, -16
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: ld s0, 0(a0)
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: addi a0, a0, -8
; RV64I-NEXT: addiw a0, a0, -8
; RV64I-NEXT: add a0, sp, a0
; RV64I-NEXT: ld ra, 0(a0)
; RV64I-NEXT: lui a0, 3
; RV64I-NEXT: mv a0, a0
; RV64I-NEXT: add sp, sp, a0
; RV64I-NEXT: ret
%1 = alloca i8, align 4096