forked from OSchip/llvm-project
[IVDescriptor] Get the exact FP instruction that does not allow reordering
This is a bugfix in IVDescriptor.cpp. The helper function `RecurrenceDescriptor::getExactFPMathInst()` is supposed to return the 1st FP instruction that does not allow reordering. However, when constructing the RecurrenceDescriptor, we trace the use-def chain staring from a PHI node and for each instruction in the use-def chain, its descriptor overrides the previous one. Therefore in the final RecurrenceDescriptor we constructed, we lose previous FP instructions that does not allow reordering. Reviewed By: kmclaughlin Differential Revision: https://reviews.llvm.org/D118073
This commit is contained in:
parent
2d566637f4
commit
f3e1f44340
|
@ -309,6 +309,10 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
|
|||
// flags from all the reduction operations.
|
||||
FastMathFlags FMF = FastMathFlags::getFast();
|
||||
|
||||
// The first instruction in the use-def chain of the Phi node that requires
|
||||
// exact floating point operations.
|
||||
Instruction *ExactFPMathInst = nullptr;
|
||||
|
||||
// A value in the reduction can be used:
|
||||
// - By the reduction:
|
||||
// - Reduction operation:
|
||||
|
@ -352,6 +356,9 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
|
|||
if (Cur != Start) {
|
||||
ReduxDesc =
|
||||
isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
|
||||
ExactFPMathInst = ExactFPMathInst == nullptr
|
||||
? ReduxDesc.getExactFPMathInst()
|
||||
: ExactFPMathInst;
|
||||
if (!ReduxDesc.isRecurrence())
|
||||
return false;
|
||||
// FIXME: FMF is allowed on phi, but propagation is not handled correctly.
|
||||
|
@ -480,8 +487,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
|
|||
if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
|
||||
return false;
|
||||
|
||||
const bool IsOrdered = checkOrderedReduction(
|
||||
Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
|
||||
const bool IsOrdered =
|
||||
checkOrderedReduction(Kind, ExactFPMathInst, ExitInstruction, Phi);
|
||||
|
||||
if (Start != Phi) {
|
||||
// If the starting value is not the same as the phi node, we speculatively
|
||||
|
@ -538,9 +545,8 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
|
|||
// is saved as part of the RecurrenceDescriptor.
|
||||
|
||||
// Save the description of this reduction variable.
|
||||
RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
|
||||
ReduxDesc.getExactFPMathInst(), RecurrenceType,
|
||||
IsSigned, IsOrdered, CastInsts,
|
||||
RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF, ExactFPMathInst,
|
||||
RecurrenceType, IsSigned, IsOrdered, CastInsts,
|
||||
MinWidthCastToRecurrenceType);
|
||||
RedDes = RD;
|
||||
|
||||
|
|
|
@ -585,6 +585,55 @@ for.end: ; preds = %for.body
|
|||
ret float %rdx
|
||||
}
|
||||
|
||||
; Negative test - loop contains two fadds and only one fadd has the fast flag,
|
||||
; which we cannot safely reorder.
|
||||
define float @fadd_multiple_one_flag(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
|
||||
; CHECK-ORDERED-LABEL: @fadd_multiple_one_flag
|
||||
; CHECK-ORDERED-NOT: vector.body
|
||||
|
||||
; CHECK-UNORDERED-LABEL: @fadd_multiple_one_flag
|
||||
; CHECK-UNORDERED: vector.body
|
||||
; CHECK-UNORDERED: %[[PHI:.*]] = phi <8 x float> [ <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %vector.ph ], [ %[[VEC_FADD2:.*]], %vector.body ]
|
||||
; CHECK-UNORDERED: %[[VEC_LOAD1:.*]] = load <8 x float>, <8 x float>
|
||||
; CHECK-UNORDERED: %[[VEC_FADD1:.*]] = fadd <8 x float> %[[PHI]], %[[VEC_LOAD1]]
|
||||
; CHECK-UNORDERED: %[[VEC_LOAD2:.*]] = load <8 x float>, <8 x float>
|
||||
; CHECK-UNORDERED: %[[VEC_FADD2]] = fadd fast <8 x float> %[[VEC_FADD1]], %[[VEC_LOAD2]]
|
||||
; CHECK-UNORDERED: middle.block
|
||||
; CHECK-UNORDERED: %[[RDX:.*]] = call float @llvm.vector.reduce.fadd.v8f32(float -0.000000e+00, <8 x float> %[[VEC_FADD2]])
|
||||
; CHECK-UNORDERED: for.body
|
||||
; CHECK-UNORDERED: %[[SUM:.*]] = phi float [ %bc.merge.rdx, %scalar.ph ], [ %[[FADD2:.*]], %for.body ]
|
||||
; CHECK-UNORDERED: %[[LOAD1:.*]] = load float, float*
|
||||
; CHECK-UNORDERED: %[[FADD1:.*]] = fadd float %sum, %[[LOAD1]]
|
||||
; CHECK-UNORDERED: %[[LOAD2:.*]] = load float, float*
|
||||
; CHECK-UNORDERED: %[[FADD2]] = fadd fast float %[[FADD1]], %[[LOAD2]]
|
||||
; CHECK-UNORDERED: for.end
|
||||
; CHECK-UNORDERED: %[[RET:.*]] = phi float [ %[[FADD2]], %for.body ], [ %[[RDX]], %middle.block ]
|
||||
; CHECK-UNORDERED: ret float %[[RET]]
|
||||
|
||||
; CHECK-NOT-VECTORIZED-LABEL: @fadd_multiple_one_flag
|
||||
; CHECK-NOT-VECTORIZED-NOT: vector.body
|
||||
|
||||
entry:
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %entry, %for.body
|
||||
%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
|
||||
%sum = phi float [ -0.000000e+00, %entry ], [ %add3, %for.body ]
|
||||
%arrayidx = getelementptr inbounds float, float* %a, i64 %iv
|
||||
%0 = load float, float* %arrayidx, align 4
|
||||
%add = fadd float %sum, %0
|
||||
%arrayidx2 = getelementptr inbounds float, float* %b, i64 %iv
|
||||
%1 = load float, float* %arrayidx2, align 4
|
||||
%add3 = fadd fast float %add, %1
|
||||
%iv.next = add nuw nsw i64 %iv, 1
|
||||
%exitcond.not = icmp eq i64 %iv.next, %n
|
||||
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
%rdx = phi float [ %add3, %for.body ]
|
||||
ret float %rdx
|
||||
}
|
||||
|
||||
; Tests with both a floating point reduction & induction, e.g.
|
||||
;
|
||||
;float fp_iv_rdx_loop(float *values, float init, float * __restrict__ A, int N) {
|
||||
|
|
Loading…
Reference in New Issue