PowerPC: Fix SPE extloadf32 handling.

The patterns were incorrect copies from the FPU code, and are
unnecessary, since there's no extended load for SPE.  Just let LLVM
itself do the work by marking it expand.

Reviewed By: #powerpc, lkail
Differential Revision: https://reviews.llvm.org/D78670
This commit is contained in:
Justin Hibbits 2020-04-16 09:42:39 -05:00
parent 605fd4d77c
commit 914dbf4808
3 changed files with 64 additions and 10 deletions

View File

@ -339,6 +339,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::FMA , MVT::f32, Legal);
}
if (Subtarget.hasSPE())
setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
// If we're enabling GP optimizations, use hardware square root

View File

@ -819,16 +819,6 @@ def SPESTWX : XForm_8<31, 151, (outs), (ins spe4rc:$rS, memrr:$dst),
} // HasSPE
let Predicates = [HasSPE] in {
def : Pat<(f64 (extloadf32 iaddr:$src)),
(COPY_TO_REGCLASS (SPELWZ iaddr:$src), SPERC)>;
def : Pat<(f64 (extloadf32 xaddr:$src)),
(COPY_TO_REGCLASS (SPELWZX xaddr:$src), SPERC)>;
def : Pat<(f64 (fpextend f32:$src)),
(COPY_TO_REGCLASS $src, SPERC)>;
}
let Predicates = [HasSPE] in {
def SELECT_CC_SPE4 : PPCCustomInserterPseudo<(outs spe4rc:$dst),
(ins crrc:$cond, spe4rc:$T, spe4rc:$F,

View File

@ -1422,3 +1422,64 @@ for.body: ; preds = %for.body, %entry
declare float @llvm.fma.f32(float, float, float) #1
attributes #1 = { nounwind readnone speculatable willreturn }
%struct.a = type { float, float }
define void @d(%struct.a* %e, %struct.a* %f) {
; CHECK-LABEL: d:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: mflr 0
; CHECK-NEXT: stw 0, 4(1)
; CHECK-NEXT: stwu 1, -48(1)
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: .cfi_offset lr, 4
; CHECK-NEXT: .cfi_offset r29, -12
; CHECK-NEXT: .cfi_offset r30, -8
; CHECK-NEXT: .cfi_offset r29, -40
; CHECK-NEXT: .cfi_offset r30, -32
; CHECK-NEXT: lwz 4, 0(4)
; CHECK-NEXT: lwz 3, 0(3)
; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill
; CHECK-NEXT: evstdd 29, 8(1) # 8-byte Folded Spill
; CHECK-NEXT: efdcfs 29, 4
; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill
; CHECK-NEXT: mr 4, 29
; CHECK-NEXT: evstdd 30, 16(1) # 8-byte Folded Spill
; CHECK-NEXT: efdcfs 30, 3
; CHECK-NEXT: evmergehi 3, 29, 29
; CHECK-NEXT: mtctr 3
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
; CHECK-NEXT: bctrl
; CHECK-NEXT: evmergehi 3, 30, 30
; CHECK-NEXT: mr 4, 30
; CHECK-NEXT: mtctr 3
; CHECK-NEXT: # kill: def $r3 killed $r3 killed $s3
; CHECK-NEXT: bctrl
; CHECK-NEXT: li 3, .LCPI58_0@l
; CHECK-NEXT: lis 4, .LCPI58_0@ha
; CHECK-NEXT: evlddx 3, 4, 3
; CHECK-NEXT: evldd 30, 16(1) # 8-byte Folded Reload
; CHECK-NEXT: efdmul 3, 29, 3
; CHECK-NEXT: evldd 29, 8(1) # 8-byte Folded Reload
; CHECK-NEXT: efscfd 3, 3
; CHECK-NEXT: stw 3, 0(3)
; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 0, 52(1)
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: mtlr 0
; CHECK-NEXT: blr
entry:
%0 = getelementptr %struct.a, %struct.a* %f, i32 0, i32 0
%1 = load float, float* undef
%conv = fpext float %1 to double
%2 = load float, float* %0
%g = fpext float %2 to double
%3 = call i32 undef(double %g)
%h = call i32 undef(double %conv)
%n = sitofp i32 %3 to double
%k = fmul double %g, %n
%l = fptrunc double %k to float
store float %l, float* undef
ret void
}