[RISCV] Honor the VT when converting float point register names to register class for inline assembly.

It appears the code here was written for the inline asm clobbering
a specific register, but it also gets used for named input and
output registers.

For the input and output case, we should honor the VT so we
don't insert conversion instructions around the inline assembly.

For the clobber, case we need to pick the largest register class.

Reviewed By: asb, jrtc27

Differential Revision: https://reviews.llvm.org/D117279
This commit is contained in:
Craig Topper 2022-01-14 08:57:09 -08:00
parent a2261e399a
commit ac6b4896ea
3 changed files with 18 additions and 70 deletions

View File

@ -9969,12 +9969,18 @@ RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
.Default(RISCV::NoRegister);
if (FReg != RISCV::NoRegister) {
assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
if (Subtarget.hasStdExtD()) {
if (Subtarget.hasStdExtD() && (VT == MVT::f64 || VT == MVT::Other)) {
unsigned RegNo = FReg - RISCV::F0_F;
unsigned DReg = RISCV::F0_D + RegNo;
return std::make_pair(DReg, &RISCV::FPR64RegClass);
}
return std::make_pair(FReg, &RISCV::FPR32RegClass);
if (VT == MVT::f32 || VT == MVT::Other)
return std::make_pair(FReg, &RISCV::FPR32RegClass);
if (Subtarget.hasStdExtZfh() && VT == MVT::f16) {
unsigned RegNo = FReg - RISCV::F0_F;
unsigned HReg = RISCV::F0_H + RegNo;
return std::make_pair(HReg, &RISCV::FPR16RegClass);
}
}
}

View File

@ -4,9 +4,9 @@
; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64F %s
; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32D %s
; RUN: | FileCheck -check-prefix=RV32F %s
; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV64D %s
; RUN: | FileCheck -check-prefix=RV64F %s
@gf = external global float
@ -32,28 +32,6 @@ define float @constraint_f_float(float %a) nounwind {
; RV64F-NEXT: #NO_APP
; RV64F-NEXT: fmv.x.w a0, ft0
; RV64F-NEXT: ret
;
; RV32D-LABEL: constraint_f_float:
; RV32D: # %bb.0:
; RV32D-NEXT: lui a1, %hi(gf)
; RV32D-NEXT: flw ft0, %lo(gf)(a1)
; RV32D-NEXT: fmv.w.x ft1, a0
; RV32D-NEXT: #APP
; RV32D-NEXT: fadd.s ft0, ft1, ft0
; RV32D-NEXT: #NO_APP
; RV32D-NEXT: fmv.x.w a0, ft0
; RV32D-NEXT: ret
;
; RV64D-LABEL: constraint_f_float:
; RV64D: # %bb.0:
; RV64D-NEXT: lui a1, %hi(gf)
; RV64D-NEXT: flw ft0, %lo(gf)(a1)
; RV64D-NEXT: fmv.w.x ft1, a0
; RV64D-NEXT: #APP
; RV64D-NEXT: fadd.s ft0, ft1, ft0
; RV64D-NEXT: #NO_APP
; RV64D-NEXT: fmv.x.w a0, ft0
; RV64D-NEXT: ret
%1 = load float, float* @gf
%2 = tail call float asm "fadd.s $0, $1, $2", "=f,f,f"(float %a, float %1)
ret float %2
@ -81,34 +59,6 @@ define float @constraint_f_float_abi_name(float %a) nounwind {
; RV64F-NEXT: #NO_APP
; RV64F-NEXT: fmv.x.w a0, ft0
; RV64F-NEXT: ret
;
; RV32D-LABEL: constraint_f_float_abi_name:
; RV32D: # %bb.0:
; RV32D-NEXT: lui a1, %hi(gf)
; RV32D-NEXT: flw ft0, %lo(gf)(a1)
; RV32D-NEXT: fmv.w.x ft1, a0
; RV32D-NEXT: fcvt.d.s fa0, ft1
; RV32D-NEXT: fcvt.d.s fs0, ft0
; RV32D-NEXT: #APP
; RV32D-NEXT: fadd.s ft0, fa0, fs0
; RV32D-NEXT: #NO_APP
; RV32D-NEXT: fcvt.s.d ft0, ft0
; RV32D-NEXT: fmv.x.w a0, ft0
; RV32D-NEXT: ret
;
; RV64D-LABEL: constraint_f_float_abi_name:
; RV64D: # %bb.0:
; RV64D-NEXT: lui a1, %hi(gf)
; RV64D-NEXT: flw ft0, %lo(gf)(a1)
; RV64D-NEXT: fmv.w.x ft1, a0
; RV64D-NEXT: fcvt.d.s fa0, ft1
; RV64D-NEXT: fcvt.d.s fs0, ft0
; RV64D-NEXT: #APP
; RV64D-NEXT: fadd.s ft0, fa0, fs0
; RV64D-NEXT: #NO_APP
; RV64D-NEXT: fcvt.s.d ft0, ft0
; RV64D-NEXT: fmv.x.w a0, ft0
; RV64D-NEXT: ret
%1 = load float, float* @gf
%2 = tail call float asm "fadd.s $0, $1, $2", "={ft0},{fa0},{fs0}"(float %a, float %1)
ret float %2

View File

@ -57,13 +57,11 @@ define half @constraint_f_half_abi_name(half %a) nounwind {
; RV32ZFH-NEXT: addi sp, sp, -16
; RV32ZFH-NEXT: fsw fs0, 12(sp) # 4-byte Folded Spill
; RV32ZFH-NEXT: lui a0, %hi(gh)
; RV32ZFH-NEXT: flh ft0, %lo(gh)(a0)
; RV32ZFH-NEXT: fcvt.s.h fa0, fa0
; RV32ZFH-NEXT: fcvt.s.h fs0, ft0
; RV32ZFH-NEXT: flh fs0, %lo(gh)(a0)
; RV32ZFH-NEXT: #APP
; RV32ZFH-NEXT: fadd.s ft0, fa0, fs0
; RV32ZFH-NEXT: #NO_APP
; RV32ZFH-NEXT: fcvt.h.s fa0, ft0
; RV32ZFH-NEXT: fmv.h fa0, ft0
; RV32ZFH-NEXT: flw fs0, 12(sp) # 4-byte Folded Reload
; RV32ZFH-NEXT: addi sp, sp, 16
; RV32ZFH-NEXT: ret
@ -73,13 +71,11 @@ define half @constraint_f_half_abi_name(half %a) nounwind {
; RV64ZFH-NEXT: addi sp, sp, -16
; RV64ZFH-NEXT: fsw fs0, 12(sp) # 4-byte Folded Spill
; RV64ZFH-NEXT: lui a0, %hi(gh)
; RV64ZFH-NEXT: flh ft0, %lo(gh)(a0)
; RV64ZFH-NEXT: fcvt.s.h fa0, fa0
; RV64ZFH-NEXT: fcvt.s.h fs0, ft0
; RV64ZFH-NEXT: flh fs0, %lo(gh)(a0)
; RV64ZFH-NEXT: #APP
; RV64ZFH-NEXT: fadd.s ft0, fa0, fs0
; RV64ZFH-NEXT: #NO_APP
; RV64ZFH-NEXT: fcvt.h.s fa0, ft0
; RV64ZFH-NEXT: fmv.h fa0, ft0
; RV64ZFH-NEXT: flw fs0, 12(sp) # 4-byte Folded Reload
; RV64ZFH-NEXT: addi sp, sp, 16
; RV64ZFH-NEXT: ret
@ -89,13 +85,11 @@ define half @constraint_f_half_abi_name(half %a) nounwind {
; RV32DZFH-NEXT: addi sp, sp, -16
; RV32DZFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
; RV32DZFH-NEXT: lui a0, %hi(gh)
; RV32DZFH-NEXT: flh ft0, %lo(gh)(a0)
; RV32DZFH-NEXT: fcvt.d.h fa0, fa0
; RV32DZFH-NEXT: fcvt.d.h fs0, ft0
; RV32DZFH-NEXT: flh fs0, %lo(gh)(a0)
; RV32DZFH-NEXT: #APP
; RV32DZFH-NEXT: fadd.s ft0, fa0, fs0
; RV32DZFH-NEXT: #NO_APP
; RV32DZFH-NEXT: fcvt.h.d fa0, ft0
; RV32DZFH-NEXT: fmv.h fa0, ft0
; RV32DZFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
; RV32DZFH-NEXT: addi sp, sp, 16
; RV32DZFH-NEXT: ret
@ -105,13 +99,11 @@ define half @constraint_f_half_abi_name(half %a) nounwind {
; RV64DZFH-NEXT: addi sp, sp, -16
; RV64DZFH-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill
; RV64DZFH-NEXT: lui a0, %hi(gh)
; RV64DZFH-NEXT: flh ft0, %lo(gh)(a0)
; RV64DZFH-NEXT: fcvt.d.h fa0, fa0
; RV64DZFH-NEXT: fcvt.d.h fs0, ft0
; RV64DZFH-NEXT: flh fs0, %lo(gh)(a0)
; RV64DZFH-NEXT: #APP
; RV64DZFH-NEXT: fadd.s ft0, fa0, fs0
; RV64DZFH-NEXT: #NO_APP
; RV64DZFH-NEXT: fcvt.h.d fa0, ft0
; RV64DZFH-NEXT: fmv.h fa0, ft0
; RV64DZFH-NEXT: fld fs0, 8(sp) # 8-byte Folded Reload
; RV64DZFH-NEXT: addi sp, sp, 16
; RV64DZFH-NEXT: ret