[RISCV] Prevent use of t0(aka x5) as rs1 for jalr instructions.

Some microarchitectures treat rs1=x1/x5 on jalr as a hint to pop the return-address stack. We should avoid using x5 on jalr instructions since we aren't using x5 as an alternate link register. Differential Revision: https://reviews.llvm.org/D105875
2021-07-12 23:53:40 -07:00 · 2021-07-12 23:53:40 -07:00 · 46e8970817
parent 2c47b8847e
commit 46e8970817
4 changed files with 81 additions and 8 deletions
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@ -1002,12 +1002,12 @@ def PseudoBR : Pseudo<(outs), (ins simm21_lsb0_jal:$imm20), [(br bb:$imm20)]>,
               PseudoInstExpansion<(JAL X0, simm21_lsb0_jal:$imm20)>;

 let isBarrier = 1, isBranch = 1, isIndirectBranch = 1, isTerminator = 1 in
-def PseudoBRIND : Pseudo<(outs), (ins GPR:$rs1, simm12:$imm12), []>,
+def PseudoBRIND : Pseudo<(outs), (ins GPRJALR:$rs1, simm12:$imm12), []>,
                  PseudoInstExpansion<(JALR X0, GPR:$rs1, simm12:$imm12)>;

-def : Pat<(brind GPR:$rs1), (PseudoBRIND GPR:$rs1, 0)>;
-def : Pat<(brind (add GPR:$rs1, simm12:$imm12)),
-          (PseudoBRIND GPR:$rs1, simm12:$imm12)>;
+def : Pat<(brind GPRJALR:$rs1), (PseudoBRIND GPRJALR:$rs1, 0)>;
+def : Pat<(brind (add GPRJALR:$rs1, simm12:$imm12)),
+          (PseudoBRIND GPRJALR:$rs1, simm12:$imm12)>;

 // PseudoCALLReg is a generic pseudo instruction for calls which will eventually
 // expand to auipc and jalr while encoding, with any given register used as the
@ -1039,8 +1039,8 @@ def : Pat<(riscv_sret_flag), (SRET X0, X0)>;
 def : Pat<(riscv_mret_flag), (MRET X0, X0)>;

 let isCall = 1, Defs = [X1] in
-def PseudoCALLIndirect : Pseudo<(outs), (ins GPR:$rs1),
-                                [(riscv_call GPR:$rs1)]>,
+def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1),
+                                [(riscv_call GPRJALR:$rs1)]>,
                         PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;

 let isBarrier = 1, isReturn = 1, isTerminator = 1 in
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@ -162,6 +162,20 @@ def GPRNoX0X2 : RegisterClass<"RISCV", [XLenVT], 32, (add
  let RegInfos = XLenRI;
 }

+// Don't use X1 or X5 for JALR since that is a hint to pop the return address
+// stack on some microarchitectures. Also remove the reserved registers X0, X2,
+// X3, and X4 as it reduces the number of register classes that get synthesized
+// by tablegen.
+def GPRJALR : RegisterClass<"RISCV", [XLenVT], 32, (add
+    (sequence "X%u", 10, 17),
+    (sequence "X%u", 6, 7),
+    (sequence "X%u", 28, 31),
+    (sequence "X%u", 8, 9),
+    (sequence "X%u", 18, 27)
+  )> {
+  let RegInfos = XLenRI;
+}
+
 def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add
    (sequence "X%u", 10, 15),
    (sequence "X%u", 8, 9)
@ -171,9 +185,10 @@ def GPRC : RegisterClass<"RISCV", [XLenVT], 32, (add

 // For indirect tail calls, we can't use callee-saved registers, as they are
 // restored to the saved value before the tail call, which would clobber a call
-// address.
+// address. We shouldn't use x5 since that is a hint for to pop the return
+// address stack on some microarchitectures.
 def GPRTC : RegisterClass<"RISCV", [XLenVT], 32, (add
-    (sequence "X%u", 5, 7),
+    (sequence "X%u", 6, 7),
    (sequence "X%u", 10, 17),
    (sequence "X%u", 28, 31)
  )> {
--- a/llvm/test/CodeGen/RISCV/calls.ll
+++ b/llvm/test/CodeGen/RISCV/calls.ll
@ -114,6 +114,46 @@ define i32 @test_call_indirect(i32 (i32)* %a, i32 %b) nounwind {
  ret i32 %1
 }

+; Make sure we don't use t0 as the source for jalr as that is a hint to pop the
+; return address stack on some microarchitectures.
+define i32 @test_call_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
+; RV32I-LABEL: test_call_indirect_no_t0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv t1, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    mv a2, a3
+; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    mv a4, a5
+; RV32I-NEXT:    mv a5, a6
+; RV32I-NEXT:    mv a6, a7
+; RV32I-NEXT:    jalr t1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_indirect_no_t0:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    mv t1, a0
+; RV32I-PIC-NEXT:    mv a0, a1
+; RV32I-PIC-NEXT:    mv a1, a2
+; RV32I-PIC-NEXT:    mv a2, a3
+; RV32I-PIC-NEXT:    mv a3, a4
+; RV32I-PIC-NEXT:    mv a4, a5
+; RV32I-PIC-NEXT:    mv a5, a6
+; RV32I-PIC-NEXT:    mv a6, a7
+; RV32I-PIC-NEXT:    jalr t1
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
+  %1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
+  ret i32 %1
+}
+
 ; Ensure that calls to fastcc functions aren't rejected. Such calls may be
 ; introduced when compiling with optimisation.

--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@ -58,6 +58,24 @@ entry:
  ret void
 }

+; Make sure we don't use t0 as the source for jr as that is a hint to pop the
+; return address stack on some microarchitectures.
+define i32 @caller_indirect_no_t0(i32 (i32, i32, i32, i32, i32, i32, i32)* %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
+; CHECK-LABEL: caller_indirect_no_t0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    mv t1, a0
+; CHECK-NEXT:    mv a0, a1
+; CHECK-NEXT:    mv a1, a2
+; CHECK-NEXT:    mv a2, a3
+; CHECK-NEXT:    mv a3, a4
+; CHECK-NEXT:    mv a4, a5
+; CHECK-NEXT:    mv a5, a6
+; CHECK-NEXT:    mv a6, a7
+; CHECK-NEXT:    jr t1
+  %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
+  ret i32 %9
+}
+
 ; Do not tail call optimize functions with varargs passed by stack.
 declare i32 @callee_varargs(i32, ...)
 define void @caller_varargs(i32 %a, i32 %b) nounwind {