[RISCV] Prevent use of t0(aka x5) as rs1 for jalr instructions.

Some microarchitectures treat rs1=x1/x5 on jalr as a hint to pop
the return-address stack. We should avoid using x5 on jalr
instructions since we aren't using x5 as an alternate link register.

Differential Revision: https://reviews.llvm.org/D105875
This commit is contained in:
Craig Topper 2021-07-12 23:53:40 -07:00
parent 2c47b8847e
commit 46e8970817
4 changed files with 81 additions and 8 deletions

View File

@ -1002,12 +1002,12 @@ def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;
let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
def PseudoBRIND : Pseudo<(outs), (ins GPR:$rs1, simm12:$imm12), []>,
def PseudoBRIND : Pseudo<(outs), (ins GPRJALR:$rs1, simm12:$imm12), []>,
PseudoInstExpansion<(JALR X0, GPR:$rs1, simm12:$imm12)>;
def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>;
def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
(PseudoBRIND GPR:$rs1, simm12:$imm12)>;
def : Pat<(brind GPRJALR:$rs1), (PseudoBRIND GPRJALR:$rs1, 0)>;
def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)),
(PseudoBRIND GPRJALR:$rs1, simm12:$imm12)>;
// PseudoCALLReg is a generic pseudo instruction for calls which will eventually
// expand to auipc and jalr while encoding, with any given register used as the
@ -1039,8 +1039,8 @@ def : Pat<(riscv_sret_flag), (SRET X0, X0)>;
def : Pat<(riscv_mret_flag), (MRET X0, X0)>;
let isCall = 1, Defs = [X1] in
def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1),
[(riscv_call GPR:$rs1)]>,
def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1),
[(riscv_call GPRJALR:$rs1)]>,
PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
let isBarrier = 1, isReturn = 1, isTerminator = 1 in

View File

@ -162,6 +162,20 @@ def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add
let RegInfos = XLenRI;
}
// Don't use X1 or X5 for JALR since that is a hint to pop the return address
// stack on some microarchitectures. Also remove the reserved registers X0, X2,
// X3, and X4 as it reduces the number of register classes that get synthesized
// by tablegen.
def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 10, 17),
(sequence "X%u", 6, 7),
(sequence "X%u", 28, 31),
(sequence "X%u", 8, 9),
(sequence "X%u", 18, 27)
)> {
let RegInfos = XLenRI;
}
def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 10, 15),
(sequence "X%u", 8, 9)
@ -171,9 +185,10 @@ def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
// For indirect tail calls, we can't use callee-saved registers, as they are
// restored to the saved value before the tail call, which would clobber a call
// address.
// address. We shouldn't use x5 since that is a hint for to pop the return
// address stack on some microarchitectures.
def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
(sequence "X%u", 5, 7),
(sequence "X%u", 6, 7),
(sequence "X%u", 10, 17),
(sequence "X%u", 28, 31)
)> {

View File

@ -114,6 +114,46 @@ define i32 @test_call_indirect(i32 (i32)* %a, i32 %b) nounwind {
ret i32 %1
}
; Make sure we don't use t0 as the source for jalr as that is a hint to pop the
; return address stack on some microarchitectures.
define i32 @test_call_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
; RV32I-LABEL: test_call_indirect_no_t0:
; RV32I: # %bb.0:
; RV32I-NEXT: addi sp, sp, -16
; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-NEXT: mv t1, a0
; RV32I-NEXT: mv a0, a1
; RV32I-NEXT: mv a1, a2
; RV32I-NEXT: mv a2, a3
; RV32I-NEXT: mv a3, a4
; RV32I-NEXT: mv a4, a5
; RV32I-NEXT: mv a5, a6
; RV32I-NEXT: mv a6, a7
; RV32I-NEXT: jalr t1
; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
; RV32I-PIC-LABEL: test_call_indirect_no_t0:
; RV32I-PIC: # %bb.0:
; RV32I-PIC-NEXT: addi sp, sp, -16
; RV32I-PIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32I-PIC-NEXT: mv t1, a0
; RV32I-PIC-NEXT: mv a0, a1
; RV32I-PIC-NEXT: mv a1, a2
; RV32I-PIC-NEXT: mv a2, a3
; RV32I-PIC-NEXT: mv a3, a4
; RV32I-PIC-NEXT: mv a4, a5
; RV32I-PIC-NEXT: mv a5, a6
; RV32I-PIC-NEXT: mv a6, a7
; RV32I-PIC-NEXT: jalr t1
; RV32I-PIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32I-PIC-NEXT: addi sp, sp, 16
; RV32I-PIC-NEXT: ret
%1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
ret i32 %1
}
; Ensure that calls to fastcc functions aren't rejected. Such calls may be
; introduced when compiling with optimisation.

View File

@ -58,6 +58,24 @@ entry:
ret void
}
; Make sure we don't use t0 as the source for jr as that is a hint to pop the
; return address stack on some microarchitectures.
define i32 @caller_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
; CHECK-LABEL: caller_indirect_no_t0:
; CHECK: # %bb.0:
; CHECK-NEXT: mv t1, a0
; CHECK-NEXT: mv a0, a1
; CHECK-NEXT: mv a1, a2
; CHECK-NEXT: mv a2, a3
; CHECK-NEXT: mv a3, a4
; CHECK-NEXT: mv a4, a5
; CHECK-NEXT: mv a5, a6
; CHECK-NEXT: mv a6, a7
; CHECK-NEXT: jr t1
%9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
ret i32 %9
}
; Do not tail call optimize functions with varargs passed by stack.
declare i32 @callee_varargs(i32, ...)
define void @caller_varargs(i32 %a, i32 %b) nounwind {