[PowerPC] [PowerPC] Enhance the fast selection of fptoi & fptrunc instruction and clean up related asserts

Summary:
Fast selection of llvm fptoi & fptrunc instructions is not handled well about
VSX instruction support.
We'd use VSX float convert integer instruction instead of non-vsx float convert
integer instruction if the operand register class is VSSRC or VSFRC because i32
and i64 are mapped to VSSRC and VSFRC correspondingly if VSX feature is
openeded.
For float trunc instruction, we do this silimar work like float convert integer
instruction to try to use VSX instruction.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D58430

llvm-svn: 354762
This commit is contained in:
Kang Zhang 2019-02-25 02:46:16 +00:00
parent 25e690273a
commit 4faa4090c9
3 changed files with 50 additions and 10 deletions

View File

@ -987,12 +987,17 @@ bool PPCFastISel::SelectFPTrunc(const Instruction *I) {
// Round the result to single precision.
unsigned DestReg;
auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::SPE4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::EFSCFD), DestReg)
.addReg(SrcReg);
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSSRCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(PPC::XSRSP), DestReg)
.addReg(SrcReg);
} else {
DestReg = createResultReg(&PPC::F4RCRegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
@ -1207,16 +1212,19 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
if (SrcReg == 0)
return false;
// Convert f32 to f64 if necessary. This is just a meaningless copy
// to get the register class right.
// Convert f32 to f64 or convert VSSRC to VSFRC if necessary. This is just a
// meaningless copy to get the register class right.
const TargetRegisterClass *InRC = MRI.getRegClass(SrcReg);
if (InRC == &PPC::F4RCRegClass)
SrcReg = copyRegToRegClass(&PPC::F8RCRegClass, SrcReg);
else if (InRC == &PPC::VSSRCRegClass)
SrcReg = copyRegToRegClass(&PPC::VSFRCRegClass, SrcReg);
// Determine the opcode for the conversion, which takes place
// entirely within FPRs.
// entirely within FPRs or VSRs.
unsigned DestReg;
unsigned Opc;
auto RC = MRI.getRegClass(SrcReg);
if (PPCSubTarget->hasSPE()) {
DestReg = createResultReg(&PPC::GPRCRegClass);
@ -1224,6 +1232,12 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) {
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTSIZ : PPC::EFDCTSIZ;
else
Opc = InRC == &PPC::SPE4RCRegClass ? PPC::EFSCTUIZ : PPC::EFDCTUIZ;
} else if (isVSFRCRegClass(RC)) {
DestReg = createResultReg(&PPC::VSFRCRegClass);
if (DstVT == MVT::i32)
Opc = IsSigned ? PPC::XSCVDPSXWS : PPC::XSCVDPUXWS;
else
Opc = IsSigned ? PPC::XSCVDPSXDS : PPC::XSCVDPUXDS;
} else {
DestReg = createResultReg(&PPC::F8RCRegClass);
if (DstVT == MVT::i32)

View File

@ -0,0 +1,13 @@
; RUN: llc -mcpu=generic -mtriple=powerpc64le-unknown-unknown -O0 < %s \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=GENERIC
; RUN: llc -mcpu=ppc -mtriple=powerpc64le-unknown-unknown -O0 < %s \
; RUN: -verify-machineinstrs | FileCheck %s
define float @testRSP(double %x) {
entry:
%0 = fptrunc double %x to float
ret float %0
; CHECK: frsp 1, 1
; GENERIC: xsrsp 1, 1
}

View File

@ -1,14 +1,27 @@
; RUN: llc -mcpu=generic -mtriple=powerpc64le-unknown-unknown -O0 < %s | FileCheck %s --check-prefix=GENERIC
; RUN: llc -mcpu=ppc -mtriple=powerpc64le-unknown-unknown -O0 < %s | FileCheck %s
; RUN: llc -mcpu=generic -mtriple=powerpc64le-unknown-unknown -O0 < %s \
; RUN: -verify-machineinstrs | FileCheck %s --check-prefix=GENERIC
; RUN: llc -mcpu=ppc -mtriple=powerpc64le-unknown-unknown -O0 < %s \
; RUN: -verify-machineinstrs | FileCheck %s
define i32 @bad(double %x) {
%1 = fptoui double %x to i32
ret i32 %1
}
; CHECK: fctidz [[REG0:[0-9]+]], 1
; CHECK: stfd [[REG0]], [[OFF:.*]](1)
; CHECK: lwz {{[0-9]*}}, [[OFF]](1)
; GENERIC: fctiwuz [[REG0:[0-9]+]], 1
; GENERIC: stfd [[REG0]], [[OFF:.*]](1)
; GENERIC: lwz {{[0-9]*}}, [[OFF]](1)
; GENERIC: xscvdpuxws [[REG0:[0-9]+]], 1
; GENERIC: mfvsrwz {{[0-9]*}}, [[REG0]]
}
define i32 @bad1(float %x) {
entry:
%0 = fptosi float %x to i32
ret i32 %0
; CHECK: fctiwz [[REG0:[0-9]+]], 1
; CHECK: stfd [[REG0]], [[OFF:.*]](1)
; CHECK: lwa {{[0-9]*}}, [[OFF]](1)
; GENERIC: xscvdpsxws [[REG0:[0-9]+]], 1
; GENERIC: mfvsrwz {{[0-9]*}}, [[REG0]]
}