forked from OSchip/llvm-project
[AMDGPU] Fix pointer info for pseudo source for r600
The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861
This commit is contained in:
parent
9278019eb3
commit
920cc2f813
|
@ -1493,3 +1493,21 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
|
|||
FlagOp.setImm(InstFlags);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
|
||||
PseudoSourceValue::PSVKind Kind) const {
|
||||
switch (Kind) {
|
||||
case PseudoSourceValue::Stack:
|
||||
case PseudoSourceValue::FixedStack:
|
||||
return AMDGPUASI.PRIVATE_ADDRESS;
|
||||
case PseudoSourceValue::ConstantPool:
|
||||
case PseudoSourceValue::GOT:
|
||||
case PseudoSourceValue::JumpTable:
|
||||
case PseudoSourceValue::GlobalValueCallEntry:
|
||||
case PseudoSourceValue::ExternalSymbolCallEntry:
|
||||
case PseudoSourceValue::TargetCustom:
|
||||
return AMDGPUASI.CONSTANT_ADDRESS;
|
||||
}
|
||||
llvm_unreachable("Invalid pseudo source kind");
|
||||
return AMDGPUASI.PRIVATE_ADDRESS;
|
||||
}
|
||||
|
|
|
@ -318,6 +318,9 @@ public:
|
|||
bool isRegisterLoad(const MachineInstr &MI) const {
|
||||
return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD;
|
||||
}
|
||||
|
||||
unsigned getAddressSpaceForPseudoSourceKind(
|
||||
PseudoSourceValue::PSVKind Kind) const override;
|
||||
};
|
||||
|
||||
namespace AMDGPU {
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}constant_load_i1:
|
||||
; GCN: buffer_load_ubyte
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}global_load_i1:
|
||||
; GCN: buffer_load_ubyte
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
|
||||
|
||||
; FUNC-LABEL: {{^}}local_load_i1:
|
||||
; GCN: ds_read_u8
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
|
||||
; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
|
||||
; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
|
||||
target datalayout = "A5"
|
||||
|
||||
; OPT-LABEL: @vector_read(
|
||||
; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
|
||||
|
@ -17,17 +18,17 @@
|
|||
; EG: MOVA_INT
|
||||
define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) {
|
||||
entry:
|
||||
%tmp = alloca [4 x i32]
|
||||
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
|
||||
store i32 0, i32* %x
|
||||
store i32 1, i32* %y
|
||||
store i32 2, i32* %z
|
||||
store i32 3, i32* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
|
||||
%tmp2 = load i32, i32* %tmp1
|
||||
%tmp = alloca [4 x i32], addrspace(5)
|
||||
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
|
||||
store i32 0, i32 addrspace(5)* %x
|
||||
store i32 1, i32 addrspace(5)* %y
|
||||
store i32 2, i32 addrspace(5)* %z
|
||||
store i32 3, i32 addrspace(5)* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
|
||||
%tmp2 = load i32, i32 addrspace(5)* %tmp1
|
||||
store i32 %tmp2, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -46,19 +47,19 @@ entry:
|
|||
; EG: MOVA_INT
|
||||
define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
|
||||
entry:
|
||||
%tmp = alloca [4 x i32]
|
||||
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
|
||||
store i32 0, i32* %x
|
||||
store i32 0, i32* %y
|
||||
store i32 0, i32* %z
|
||||
store i32 0, i32* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index
|
||||
store i32 1, i32* %tmp1
|
||||
%tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index
|
||||
%tmp3 = load i32, i32* %tmp2
|
||||
%tmp = alloca [4 x i32], addrspace(5)
|
||||
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
|
||||
store i32 0, i32 addrspace(5)* %x
|
||||
store i32 0, i32 addrspace(5)* %y
|
||||
store i32 0, i32 addrspace(5)* %z
|
||||
store i32 0, i32 addrspace(5)* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index
|
||||
store i32 1, i32 addrspace(5)* %tmp1
|
||||
%tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index
|
||||
%tmp3 = load i32, i32 addrspace(5)* %tmp2
|
||||
store i32 %tmp3, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -73,19 +74,19 @@ entry:
|
|||
; EG: STORE_RAW
|
||||
define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
|
||||
entry:
|
||||
%tmp = alloca [4 x i32]
|
||||
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
|
||||
store i32 0, i32* %x
|
||||
store i32 0, i32* %y
|
||||
store i32 0, i32* %z
|
||||
store i32 0, i32* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
|
||||
%tmp2 = bitcast i32* %tmp1 to [4 x i32]*
|
||||
%tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0
|
||||
%tmp4 = load i32, i32* %tmp3
|
||||
%tmp = alloca [4 x i32], addrspace(5)
|
||||
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
|
||||
store i32 0, i32 addrspace(5)* %x
|
||||
store i32 0, i32 addrspace(5)* %y
|
||||
store i32 0, i32 addrspace(5)* %z
|
||||
store i32 0, i32 addrspace(5)* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
|
||||
%tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)*
|
||||
%tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0
|
||||
%tmp4 = load i32, i32 addrspace(5)* %tmp3
|
||||
store i32 %tmp4, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -95,18 +96,18 @@ entry:
|
|||
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
|
||||
define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) {
|
||||
entry:
|
||||
%tmp = alloca [4 x i32]
|
||||
%x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
|
||||
%y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
|
||||
%z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
|
||||
%w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
|
||||
%bc = bitcast i32* %x to float*
|
||||
store float 1.0, float* %bc
|
||||
store i32 1, i32* %y
|
||||
store i32 2, i32* %z
|
||||
store i32 3, i32* %w
|
||||
%tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
|
||||
%tmp2 = load i32, i32* %tmp1
|
||||
%tmp = alloca [4 x i32], addrspace(5)
|
||||
%x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
|
||||
%y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
|
||||
%z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
|
||||
%w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
|
||||
%bc = bitcast i32 addrspace(5)* %x to float addrspace(5)*
|
||||
store float 1.0, float addrspace(5)* %bc
|
||||
store i32 1, i32 addrspace(5)* %y
|
||||
store i32 2, i32 addrspace(5)* %z
|
||||
store i32 3, i32 addrspace(5)* %w
|
||||
%tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
|
||||
%tmp2 = load i32, i32 addrspace(5)* %tmp1
|
||||
store i32 %tmp2, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -123,18 +124,18 @@ entry:
|
|||
; OPT: load float
|
||||
define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) {
|
||||
entry:
|
||||
%tmp = alloca [4 x i32]
|
||||
%tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]*
|
||||
%x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0
|
||||
%y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1
|
||||
%z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2
|
||||
%w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3
|
||||
store float 0.0, float* %x
|
||||
store float 1.0, float* %y
|
||||
store float 2.0, float* %z
|
||||
store float 4.0, float* %w
|
||||
%tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index
|
||||
%tmp2 = load float, float* %tmp1
|
||||
%tmp = alloca [4 x i32], addrspace(5)
|
||||
%tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)*
|
||||
%x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0
|
||||
%y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1
|
||||
%z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2
|
||||
%w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3
|
||||
store float 0.0, float addrspace(5)* %x
|
||||
store float 1.0, float addrspace(5)* %y
|
||||
store float 2.0, float addrspace(5)* %z
|
||||
store float 4.0, float addrspace(5)* %w
|
||||
%tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index
|
||||
%tmp2 = load float, float addrspace(5)* %tmp1
|
||||
store float %tmp2, float addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
@ -146,17 +147,17 @@ entry:
|
|||
; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
|
||||
define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) {
|
||||
entry:
|
||||
%tmp = alloca [4 x i32]
|
||||
%x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
|
||||
store i32 0, i32* %x
|
||||
store i32 1, i32* %y
|
||||
store i32 2, i32* %z
|
||||
store i32 3, i32* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
|
||||
%tmp2 = load i32, i32* %tmp1
|
||||
%tmp = alloca [4 x i32], addrspace(5)
|
||||
%x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
|
||||
%y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
|
||||
%z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
|
||||
%w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
|
||||
store i32 0, i32 addrspace(5)* %x
|
||||
store i32 1, i32 addrspace(5)* %y
|
||||
store i32 2, i32 addrspace(5)* %z
|
||||
store i32 3, i32 addrspace(5)* %w
|
||||
%tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
|
||||
%tmp2 = load i32, i32 addrspace(5)* %tmp1
|
||||
store i32 %tmp2, i32 addrspace(1)* %out
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue