forked from OSchip/llvm-project
[PowerPC] Disable CTR loops containing operations on half-precision
On subtargets prior to Power9, conversions to/from half precision are lowered to libcalls. This makes loops containing such operations invalid candidates for HW loops. Fixes: https://bugs.llvm.org/show_bug.cgi?id=48519
This commit is contained in:
parent
5abfeccf10
commit
0a19fc3088
|
@ -629,6 +629,10 @@ bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
|
||||||
isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
|
isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
|
||||||
isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
|
isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
|
||||||
return true;
|
return true;
|
||||||
|
if (!ST->isISA3_0() &&
|
||||||
|
(CI->getSrcTy()->getScalarType()->isHalfTy() ||
|
||||||
|
CI->getDestTy()->getScalarType()->isHalfTy()))
|
||||||
|
return true;
|
||||||
} else if (isLargeIntegerTy(!TM.isPPC64(),
|
} else if (isLargeIntegerTy(!TM.isPPC64(),
|
||||||
J->getType()->getScalarType()) &&
|
J->getType()->getScalarType()) &&
|
||||||
(J->getOpcode() == Instruction::UDiv ||
|
(J->getOpcode() == Instruction::UDiv ||
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
|
; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
|
||||||
|
; RUN: -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
|
||||||
|
define void @julia__typed_vcat_20() #0 {
|
||||||
|
; CHECK-LABEL: julia__typed_vcat_20:
|
||||||
|
; CHECK: # %bb.0: # %top
|
||||||
|
; CHECK-NEXT: mflr r0
|
||||||
|
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
|
||||||
|
; CHECK-NEXT: std r0, 16(r1)
|
||||||
|
; CHECK-NEXT: stdu r1, -48(r1)
|
||||||
|
; CHECK-NEXT: li r3, 1
|
||||||
|
; CHECK-NEXT: li r30, 0
|
||||||
|
; CHECK-NEXT: .p2align 4
|
||||||
|
; CHECK-NEXT: .LBB0_1: # %L139
|
||||||
|
; CHECK-NEXT: #
|
||||||
|
; CHECK-NEXT: addi r3, r3, -1
|
||||||
|
; CHECK-NEXT: mtfprd f0, r3
|
||||||
|
; CHECK-NEXT: xscvsxdsp f1, f0
|
||||||
|
; CHECK-NEXT: bl __gnu_f2h_ieee
|
||||||
|
; CHECK-NEXT: nop
|
||||||
|
; CHECK-NEXT: bl __gnu_h2f_ieee
|
||||||
|
; CHECK-NEXT: nop
|
||||||
|
; CHECK-NEXT: addi r30, r30, -1
|
||||||
|
; CHECK-NEXT: li r3, 0
|
||||||
|
; CHECK-NEXT: cmpldi r30, 0
|
||||||
|
; CHECK-NEXT: bne+ cr0, .LBB0_1
|
||||||
|
; CHECK-NEXT: # %bb.2: # %pass.1
|
||||||
|
; CHECK-NEXT: bl __gnu_f2h_ieee
|
||||||
|
; CHECK-NEXT: nop
|
||||||
|
; CHECK-NEXT: sth r3, 0(r3)
|
||||||
|
top:
|
||||||
|
%.sroa.6.0.copyload = load i64, i64 addrspace(11)* null, align 8
|
||||||
|
%0 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %.sroa.6.0.copyload, i64 0)
|
||||||
|
%1 = extractvalue { i64, i1 } %0, 0
|
||||||
|
br label %L139
|
||||||
|
|
||||||
|
L139: ; preds = %L139, %top
|
||||||
|
%value_phi21 = phi i64 [ %5, %L139 ], [ 1, %top ]
|
||||||
|
%value_phi23 = phi i64 [ 0, %L139 ], [ 1, %top ]
|
||||||
|
%2 = add nsw i64 %value_phi23, -1
|
||||||
|
%3 = add i64 %2, 0
|
||||||
|
%4 = sitofp i64 %3 to half
|
||||||
|
store half %4, half addrspace(13)* undef, align 2
|
||||||
|
%.not101.not = icmp eq i64 %value_phi21, 0
|
||||||
|
%5 = add i64 %value_phi21, 1
|
||||||
|
br i1 %.not101.not, label %pass.1, label %L139
|
||||||
|
|
||||||
|
pass.1: ; preds = %L139
|
||||||
|
unreachable
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nounwind readnone speculatable willreturn
|
||||||
|
declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) #0
|
||||||
|
|
||||||
|
attributes #0 = { nounwind }
|
Loading…
Reference in New Issue