forked from OSchip/llvm-project
R600: Recommit 199842: Add work-around for the CF stack entry HW bug
The unit test is now disabled on non-asserts builds. The CF stack can be corrupted if you use CF_ALU_PUSH_BEFORE, CF_ALU_ELSE_AFTER, CF_ALU_BREAK, or CF_ALU_CONTINUE when the number of sub-entries on the stack is greater than or equal to the stack entry size and sub-entries modulo 4 is either 0 or 3 (on cedar the bug is present when number of sub-entries module 8 is either 7 or 0) We choose to be conservative and always apply the work-around when the number of sub-enries is greater than or equal to the stack entry size, so that we can safely over-allocate the stack when we are unsure of the stack allocation rules. reviewed-by: Vincent Lejeune <vljn at ovi.com> llvm-svn: 199905
This commit is contained in:
parent
04e2ecfda2
commit
348273df97
|
@ -63,6 +63,11 @@ def FeatureCaymanISA : SubtargetFeature<"caymanISA",
|
||||||
"true",
|
"true",
|
||||||
"Use Cayman ISA">;
|
"Use Cayman ISA">;
|
||||||
|
|
||||||
|
def FeatureCFALUBug : SubtargetFeature<"cfalubug",
|
||||||
|
"CFALUBug",
|
||||||
|
"true",
|
||||||
|
"GPU has CF_ALU bug">;
|
||||||
|
|
||||||
class SubtargetFeatureFetchLimit <string Value> :
|
class SubtargetFeatureFetchLimit <string Value> :
|
||||||
SubtargetFeature <"fetch"#Value,
|
SubtargetFeature <"fetch"#Value,
|
||||||
"TexVTXClauseSize",
|
"TexVTXClauseSize",
|
||||||
|
|
|
@ -39,6 +39,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) :
|
||||||
EnableIRStructurizer = true;
|
EnableIRStructurizer = true;
|
||||||
EnableIfCvt = true;
|
EnableIfCvt = true;
|
||||||
WavefrontSize = 0;
|
WavefrontSize = 0;
|
||||||
|
CFALUBug = false;
|
||||||
ParseSubtargetFeatures(GPU, FS);
|
ParseSubtargetFeatures(GPU, FS);
|
||||||
DevName = GPU;
|
DevName = GPU;
|
||||||
}
|
}
|
||||||
|
@ -97,6 +98,11 @@ AMDGPUSubtarget::getStackEntrySize() const {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
bool
|
bool
|
||||||
|
AMDGPUSubtarget::hasCFAluBug() const {
|
||||||
|
assert(getGeneration() <= NORTHERN_ISLANDS);
|
||||||
|
return CFALUBug;
|
||||||
|
}
|
||||||
|
bool
|
||||||
AMDGPUSubtarget::isTargetELF() const {
|
AMDGPUSubtarget::isTargetELF() const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -52,6 +52,7 @@ private:
|
||||||
bool EnableIRStructurizer;
|
bool EnableIRStructurizer;
|
||||||
bool EnableIfCvt;
|
bool EnableIfCvt;
|
||||||
unsigned WavefrontSize;
|
unsigned WavefrontSize;
|
||||||
|
bool CFALUBug;
|
||||||
|
|
||||||
InstrItineraryData InstrItins;
|
InstrItineraryData InstrItins;
|
||||||
|
|
||||||
|
@ -71,6 +72,7 @@ public:
|
||||||
bool isIfCvtEnabled() const;
|
bool isIfCvtEnabled() const;
|
||||||
unsigned getWavefrontSize() const;
|
unsigned getWavefrontSize() const;
|
||||||
unsigned getStackEntrySize() const;
|
unsigned getStackEntrySize() const;
|
||||||
|
bool hasCFAluBug() const;
|
||||||
|
|
||||||
virtual bool enableMachineScheduler() const {
|
virtual bool enableMachineScheduler() const {
|
||||||
return getGeneration() <= NORTHERN_ISLANDS;
|
return getGeneration() <= NORTHERN_ISLANDS;
|
||||||
|
|
|
@ -46,13 +46,15 @@ def : Proc<"rv770", R600_VLIW5_Itin,
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def : Proc<"cedar", R600_VLIW5_Itin,
|
def : Proc<"cedar", R600_VLIW5_Itin,
|
||||||
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32]>;
|
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize32,
|
||||||
|
FeatureCFALUBug]>;
|
||||||
|
|
||||||
def : Proc<"redwood", R600_VLIW5_Itin,
|
def : Proc<"redwood", R600_VLIW5_Itin,
|
||||||
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
|
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64,
|
||||||
|
FeatureCFALUBug]>;
|
||||||
|
|
||||||
def : Proc<"sumo", R600_VLIW5_Itin,
|
def : Proc<"sumo", R600_VLIW5_Itin,
|
||||||
[FeatureEvergreen, FeatureWavefrontSize64]>;
|
[FeatureEvergreen, FeatureWavefrontSize64, FeatureCFALUBug]>;
|
||||||
|
|
||||||
def : Proc<"juniper", R600_VLIW5_Itin,
|
def : Proc<"juniper", R600_VLIW5_Itin,
|
||||||
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
|
[FeatureEvergreen, FeatureVertexCache, FeatureWavefrontSize64]>;
|
||||||
|
@ -66,13 +68,13 @@ def : Proc<"cypress", R600_VLIW5_Itin,
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def : Proc<"barts", R600_VLIW5_Itin,
|
def : Proc<"barts", R600_VLIW5_Itin,
|
||||||
[FeatureNorthernIslands, FeatureVertexCache]>;
|
[FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
|
||||||
|
|
||||||
def : Proc<"turks", R600_VLIW5_Itin,
|
def : Proc<"turks", R600_VLIW5_Itin,
|
||||||
[FeatureNorthernIslands, FeatureVertexCache]>;
|
[FeatureNorthernIslands, FeatureVertexCache, FeatureCFALUBug]>;
|
||||||
|
|
||||||
def : Proc<"caicos", R600_VLIW5_Itin,
|
def : Proc<"caicos", R600_VLIW5_Itin,
|
||||||
[FeatureNorthernIslands]>;
|
[FeatureNorthernIslands, FeatureCFALUBug]>;
|
||||||
|
|
||||||
def : Proc<"cayman", R600_VLIW4_Itin,
|
def : Proc<"cayman", R600_VLIW4_Itin,
|
||||||
[FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
|
[FeatureNorthernIslands, FeatureFP64, FeatureCaymanISA]>;
|
||||||
|
|
|
@ -73,6 +73,44 @@ bool CFStack::branchStackContains(CFStack::StackItem Item) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
|
||||||
|
if (Opcode == AMDGPU::CF_ALU_PUSH_BEFORE && ST.hasCaymanISA() &&
|
||||||
|
getLoopDepth() > 1)
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (!ST.hasCFAluBug())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
switch(Opcode) {
|
||||||
|
default: return false;
|
||||||
|
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||||
|
case AMDGPU::CF_ALU_ELSE_AFTER:
|
||||||
|
case AMDGPU::CF_ALU_BREAK:
|
||||||
|
case AMDGPU::CF_ALU_CONTINUE:
|
||||||
|
if (CurrentSubEntries == 0)
|
||||||
|
return false;
|
||||||
|
if (ST.getWavefrontSize() == 64) {
|
||||||
|
// We are being conservative here. We only require this work-around if
|
||||||
|
// CurrentSubEntries > 3 &&
|
||||||
|
// (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
|
||||||
|
//
|
||||||
|
// We have to be conservative, because we don't know for certain that
|
||||||
|
// our stack allocation algorithm for Evergreen/NI is correct. Applying this
|
||||||
|
// work-around when CurrentSubEntries > 3 allows us to over-allocate stack
|
||||||
|
// resources without any problems.
|
||||||
|
return CurrentSubEntries > 3;
|
||||||
|
} else {
|
||||||
|
assert(ST.getWavefrontSize() == 32);
|
||||||
|
// We are being conservative here. We only require the work-around if
|
||||||
|
// CurrentSubEntries > 7 &&
|
||||||
|
// (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
|
||||||
|
// See the comment on the wavefront size == 64 case for why we are
|
||||||
|
// being conservative.
|
||||||
|
return CurrentSubEntries > 7;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
|
unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
|
||||||
switch(Item) {
|
switch(Item) {
|
||||||
default:
|
default:
|
||||||
|
@ -472,9 +510,12 @@ public:
|
||||||
if (MI->getOpcode() == AMDGPU::CF_ALU)
|
if (MI->getOpcode() == AMDGPU::CF_ALU)
|
||||||
LastAlu.back() = MI;
|
LastAlu.back() = MI;
|
||||||
I++;
|
I++;
|
||||||
|
bool RequiresWorkAround =
|
||||||
|
CFStack.requiresWorkAroundForInst(MI->getOpcode());
|
||||||
switch (MI->getOpcode()) {
|
switch (MI->getOpcode()) {
|
||||||
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
case AMDGPU::CF_ALU_PUSH_BEFORE:
|
||||||
if (ST.hasCaymanISA() && CFStack.getLoopDepth() > 1) {
|
if (RequiresWorkAround) {
|
||||||
|
DEBUG(dbgs() << "Applying bug work-around for ALU_PUSH_BEFORE\n");
|
||||||
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
|
BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(AMDGPU::CF_PUSH_EG))
|
||||||
.addImm(CfCount + 1)
|
.addImm(CfCount + 1)
|
||||||
.addImm(1);
|
.addImm(1);
|
||||||
|
|
|
@ -0,0 +1,227 @@
|
||||||
|
; RUN: llc -march=r600 -mcpu=redwood -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=sumo -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=barts -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=turks -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=caicos -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG64 --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=cedar -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=BUG32 --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=juniper -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=cypress -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
|
||||||
|
; RUN: llc -march=r600 -mcpu=cayman -debug-only=r600cf %s -o - 2>&1 | FileCheck %s --check-prefix=NOBUG --check-prefix=FUNC
|
||||||
|
|
||||||
|
; REQUIRES: asserts
|
||||||
|
|
||||||
|
; We are currently allocating 2 extra sub-entries on Evergreen / NI for
|
||||||
|
; non-WQM push instructions if we change this to 1, then we will need to
|
||||||
|
; add one level of depth to each of these tests.
|
||||||
|
|
||||||
|
; BUG64-NOT: Applying bug work-around
|
||||||
|
; BUG32-NOT: Applying bug work-around
|
||||||
|
; NOBUG-NOT: Applying bug work-around
|
||||||
|
; FUNC-LABEL: @nested3
|
||||||
|
define void @nested3(i32 addrspace(1)* %out, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%0 = icmp sgt i32 %cond, 0
|
||||||
|
br i1 %0, label %if.1, label %end
|
||||||
|
|
||||||
|
if.1:
|
||||||
|
%1 = icmp sgt i32 %cond, 10
|
||||||
|
br i1 %1, label %if.2, label %if.store.1
|
||||||
|
|
||||||
|
if.store.1:
|
||||||
|
store i32 1, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.2:
|
||||||
|
%2 = icmp sgt i32 %cond, 20
|
||||||
|
br i1 %2, label %if.3, label %if.2.store
|
||||||
|
|
||||||
|
if.2.store:
|
||||||
|
store i32 2, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.3:
|
||||||
|
store i32 3, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; BUG64: Applying bug work-around
|
||||||
|
; BUG32-NOT: Applying bug work-around
|
||||||
|
; NOBUG-NOT: Applying bug work-around
|
||||||
|
; FUNC-LABEL: @nested4
|
||||||
|
define void @nested4(i32 addrspace(1)* %out, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%0 = icmp sgt i32 %cond, 0
|
||||||
|
br i1 %0, label %if.1, label %end
|
||||||
|
|
||||||
|
if.1:
|
||||||
|
%1 = icmp sgt i32 %cond, 10
|
||||||
|
br i1 %1, label %if.2, label %if.1.store
|
||||||
|
|
||||||
|
if.1.store:
|
||||||
|
store i32 1, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.2:
|
||||||
|
%2 = icmp sgt i32 %cond, 20
|
||||||
|
br i1 %2, label %if.3, label %if.2.store
|
||||||
|
|
||||||
|
if.2.store:
|
||||||
|
store i32 2, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.3:
|
||||||
|
%3 = icmp sgt i32 %cond, 30
|
||||||
|
br i1 %3, label %if.4, label %if.3.store
|
||||||
|
|
||||||
|
if.3.store:
|
||||||
|
store i32 3, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.4:
|
||||||
|
store i32 4, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; BUG64: Applying bug work-around
|
||||||
|
; BUG32-NOT: Applying bug work-around
|
||||||
|
; NOBUG-NOT: Applying bug work-around
|
||||||
|
; FUNC-LABEL: @nested7
|
||||||
|
define void @nested7(i32 addrspace(1)* %out, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%0 = icmp sgt i32 %cond, 0
|
||||||
|
br i1 %0, label %if.1, label %end
|
||||||
|
|
||||||
|
if.1:
|
||||||
|
%1 = icmp sgt i32 %cond, 10
|
||||||
|
br i1 %1, label %if.2, label %if.1.store
|
||||||
|
|
||||||
|
if.1.store:
|
||||||
|
store i32 1, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.2:
|
||||||
|
%2 = icmp sgt i32 %cond, 20
|
||||||
|
br i1 %2, label %if.3, label %if.2.store
|
||||||
|
|
||||||
|
if.2.store:
|
||||||
|
store i32 2, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.3:
|
||||||
|
%3 = icmp sgt i32 %cond, 30
|
||||||
|
br i1 %3, label %if.4, label %if.3.store
|
||||||
|
|
||||||
|
if.3.store:
|
||||||
|
store i32 3, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.4:
|
||||||
|
%4 = icmp sgt i32 %cond, 40
|
||||||
|
br i1 %4, label %if.5, label %if.4.store
|
||||||
|
|
||||||
|
if.4.store:
|
||||||
|
store i32 4, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.5:
|
||||||
|
%5 = icmp sgt i32 %cond, 50
|
||||||
|
br i1 %5, label %if.6, label %if.5.store
|
||||||
|
|
||||||
|
if.5.store:
|
||||||
|
store i32 5, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.6:
|
||||||
|
%6 = icmp sgt i32 %cond, 60
|
||||||
|
br i1 %6, label %if.7, label %if.6.store
|
||||||
|
|
||||||
|
if.6.store:
|
||||||
|
store i32 6, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.7:
|
||||||
|
store i32 7, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; BUG64: Applying bug work-around
|
||||||
|
; BUG32: Applying bug work-around
|
||||||
|
; NOBUG-NOT: Applying bug work-around
|
||||||
|
; FUNC-LABEL: @nested8
|
||||||
|
define void @nested8(i32 addrspace(1)* %out, i32 %cond) {
|
||||||
|
entry:
|
||||||
|
%0 = icmp sgt i32 %cond, 0
|
||||||
|
br i1 %0, label %if.1, label %end
|
||||||
|
|
||||||
|
if.1:
|
||||||
|
%1 = icmp sgt i32 %cond, 10
|
||||||
|
br i1 %1, label %if.2, label %if.1.store
|
||||||
|
|
||||||
|
if.1.store:
|
||||||
|
store i32 1, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.2:
|
||||||
|
%2 = icmp sgt i32 %cond, 20
|
||||||
|
br i1 %2, label %if.3, label %if.2.store
|
||||||
|
|
||||||
|
if.2.store:
|
||||||
|
store i32 2, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.3:
|
||||||
|
%3 = icmp sgt i32 %cond, 30
|
||||||
|
br i1 %3, label %if.4, label %if.3.store
|
||||||
|
|
||||||
|
if.3.store:
|
||||||
|
store i32 3, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.4:
|
||||||
|
%4 = icmp sgt i32 %cond, 40
|
||||||
|
br i1 %4, label %if.5, label %if.4.store
|
||||||
|
|
||||||
|
if.4.store:
|
||||||
|
store i32 4, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.5:
|
||||||
|
%5 = icmp sgt i32 %cond, 50
|
||||||
|
br i1 %5, label %if.6, label %if.5.store
|
||||||
|
|
||||||
|
if.5.store:
|
||||||
|
store i32 5, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.6:
|
||||||
|
%6 = icmp sgt i32 %cond, 60
|
||||||
|
br i1 %6, label %if.7, label %if.6.store
|
||||||
|
|
||||||
|
if.6.store:
|
||||||
|
store i32 6, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.7:
|
||||||
|
%7 = icmp sgt i32 %cond, 70
|
||||||
|
br i1 %7, label %if.8, label %if.7.store
|
||||||
|
|
||||||
|
if.7.store:
|
||||||
|
store i32 7, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
if.8:
|
||||||
|
store i32 8, i32 addrspace(1)* %out
|
||||||
|
br label %end
|
||||||
|
|
||||||
|
end:
|
||||||
|
ret void
|
||||||
|
}
|
Loading…
Reference in New Issue