forked from OSchip/llvm-project
Ignore PHI-defs for -new-coalescer interference checks.
A PHI can't create interference on its own. If two live ranges interfere at a PHI, they must also interfere when leaving one of the PHI predecessors. llvm-svn: 164330
This commit is contained in:
parent
09cd303655
commit
b8707faba3
|
@ -1402,7 +1402,6 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
|
|||
// values should be merged into one, but not into any preceding value.
|
||||
// The first value defined or visited gets CR_Keep, the other gets CR_Merge.
|
||||
if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
|
||||
DEBUG(dbgs() << "\t\tDouble def: " << VNI->def << '\n');
|
||||
assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
|
||||
|
||||
// One value stays, the other is merged. Keep the earlier one, or the first
|
||||
|
@ -1420,7 +1419,11 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
|
|||
// Keep this value, check for conflicts when analyzing OtherVNI.
|
||||
if (!OtherV.isAnalyzed())
|
||||
return CR_Keep;
|
||||
// Both sides have been analyzed now. Do they conflict?
|
||||
// Both sides have been analyzed now.
|
||||
// Allow overlapping PHI values. Any real interference would show up in a
|
||||
// predecessor, the PHI itself can't introduce any conflicts.
|
||||
if (VNI->isPHIDef())
|
||||
return CR_Merge;
|
||||
if (V.ValidLanes & OtherV.ValidLanes)
|
||||
// Overlapping lanes can't be resolved.
|
||||
return CR_Impossible;
|
||||
|
@ -1441,9 +1444,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
|
|||
Other.computeAssignment(V.OtherVNI->id, *this);
|
||||
const Val &OtherV = Other.Vals[V.OtherVNI->id];
|
||||
|
||||
// Don't attempt resolving PHI values for now.
|
||||
// Allow overlapping PHI values. Any real interference would show up in a
|
||||
// predecessor, the PHI itself can't introduce any conflicts.
|
||||
if (VNI->isPHIDef())
|
||||
return CR_Impossible;
|
||||
return CR_Replace;
|
||||
|
||||
// Check for simple erasable conflicts.
|
||||
if (DefMI->isImplicitDef())
|
||||
|
|
|
@ -141,3 +141,51 @@ if.end: ; preds = %entry, %if.then
|
|||
tail call void @llvm.arm.neon.vst1.v2f32(i8* %0, <2 x float> %x.0, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK: f5
|
||||
; Coalesce vector lanes through phis.
|
||||
; CHECK: vmov.f32 {{.*}}, #1.0
|
||||
; CHECK-NOT: vmov
|
||||
; CHECK-NOT: vorr
|
||||
; CHECK: %if.end
|
||||
; We may leave the last insertelement in the if.end block.
|
||||
; It is inserting the %add value into a dead lane, but %add causes interference
|
||||
; in the entry block, and we don't do dead lane checks across basic blocks.
|
||||
define void @f5(float* %p, float* %q) nounwind ssp {
|
||||
entry:
|
||||
%0 = bitcast float* %p to i8*
|
||||
%vld1 = tail call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* %0, i32 4)
|
||||
%vecext = extractelement <4 x float> %vld1, i32 0
|
||||
%vecext1 = extractelement <4 x float> %vld1, i32 1
|
||||
%vecext2 = extractelement <4 x float> %vld1, i32 2
|
||||
%vecext3 = extractelement <4 x float> %vld1, i32 3
|
||||
%add = fadd float %vecext3, 1.000000e+00
|
||||
%tobool = icmp eq float* %q, null
|
||||
br i1 %tobool, label %if.end, label %if.then
|
||||
|
||||
if.then: ; preds = %entry
|
||||
%arrayidx = getelementptr inbounds float* %q, i32 1
|
||||
%1 = load float* %arrayidx, align 4
|
||||
%add4 = fadd float %vecext, %1
|
||||
%2 = load float* %q, align 4
|
||||
%add6 = fadd float %vecext1, %2
|
||||
%arrayidx7 = getelementptr inbounds float* %q, i32 2
|
||||
%3 = load float* %arrayidx7, align 4
|
||||
%add8 = fadd float %vecext2, %3
|
||||
br label %if.end
|
||||
|
||||
if.end: ; preds = %entry, %if.then
|
||||
%a.0 = phi float [ %add4, %if.then ], [ %vecext, %entry ]
|
||||
%b.0 = phi float [ %add6, %if.then ], [ %vecext1, %entry ]
|
||||
%c.0 = phi float [ %add8, %if.then ], [ %vecext2, %entry ]
|
||||
%vecinit = insertelement <4 x float> undef, float %a.0, i32 0
|
||||
%vecinit9 = insertelement <4 x float> %vecinit, float %b.0, i32 1
|
||||
%vecinit10 = insertelement <4 x float> %vecinit9, float %c.0, i32 2
|
||||
%vecinit11 = insertelement <4 x float> %vecinit10, float %add, i32 3
|
||||
tail call void @llvm.arm.neon.vst1.v4f32(i8* %0, <4 x float> %vecinit11, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*, i32) nounwind readonly
|
||||
|
||||
declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
|
||||
|
|
Loading…
Reference in New Issue