[AMDGPU] Fix pointer info for pseudo source for r600

The pointer info for pseudo source for r600 is not correct when alloca addr space is not 0, which causes invalid SDNode for r600---amdgiz. This patch fixes that. Differential Revision: https://reviews.llvm.org/D39670 llvm-svn: 317861
2017-11-10 01:53:24 +00:00 · 2017-11-10 01:53:24 +00:00 · 920cc2f813
parent 9278019eb3
commit 920cc2f813
6 changed files with 109 additions and 87 deletions
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@ -1493,3 +1493,21 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
    FlagOp.setImm(InstFlags);
  }
 }
+
+unsigned R600InstrInfo::getAddressSpaceForPseudoSourceKind(
+    PseudoSourceValue::PSVKind Kind) const {
+  switch (Kind) {
+  case PseudoSourceValue::Stack:
+  case PseudoSourceValue::FixedStack:
+    return AMDGPUASI.PRIVATE_ADDRESS;
+  case PseudoSourceValue::ConstantPool:
+  case PseudoSourceValue::GOT:
+  case PseudoSourceValue::JumpTable:
+  case PseudoSourceValue::GlobalValueCallEntry:
+  case PseudoSourceValue::ExternalSymbolCallEntry:
+  case PseudoSourceValue::TargetCustom:
+    return AMDGPUASI.CONSTANT_ADDRESS;
+  }
+  llvm_unreachable("Invalid pseudo source kind");
+  return AMDGPUASI.PRIVATE_ADDRESS;
+}
--- a/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@ -318,6 +318,9 @@ public:
  bool isRegisterLoad(const MachineInstr &MI) const {
    return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD;
  }
+
+  unsigned getAddressSpaceForPseudoSourceKind(
+      PseudoSourceValue::PSVKind Kind) const override;
 };

 namespace AMDGPU {
--- a/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-i1.ll
@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}constant_load_i1:
 ; GCN: buffer_load_ubyte
--- a/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-global-i1.ll
@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}global_load_i1:
 ; GCN: buffer_load_ubyte
--- a/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-local-i1.ll
@ -1,6 +1,6 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s

 ; FUNC-LABEL: {{^}}local_load_i1:
 ; GCN: ds_read_u8
--- a/llvm/test/CodeGen/AMDGPU/vector-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-alloca.ll
@ -1,9 +1,10 @@
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
-; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
-; RUN: opt -S -mtriple=amdgcn-- -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-ALLOCA -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=+promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefix=SI-PROMOTE -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck --check-prefix=EG -check-prefix=FUNC %s
+; RUN: opt -S -mtriple=amdgcn---amdgiz -amdgpu-promote-alloca -sroa -instcombine < %s | FileCheck -check-prefix=OPT %s
+target datalayout = "A5"

 ; OPT-LABEL: @vector_read(
 ; OPT: %0 = extractelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %index
@ -17,17 +18,17 @@
 ; EG: MOVA_INT
 define amdgpu_kernel void @vector_read(i32 addrspace(1)* %out, i32 %index) {
 entry:
-  %tmp = alloca [4 x i32]
-  %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
-  %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
-  %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
-  %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
-  store i32 0, i32* %x
-  store i32 1, i32* %y
-  store i32 2, i32* %z
-  store i32 3, i32* %w
-  %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
-  %tmp2 = load i32, i32* %tmp1
+  %tmp = alloca [4 x i32], addrspace(5)
+  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+  store i32 0, i32 addrspace(5)* %x
+  store i32 1, i32 addrspace(5)* %y
+  store i32 2, i32 addrspace(5)* %z
+  store i32 3, i32 addrspace(5)* %w
+  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
+  %tmp2 = load i32, i32 addrspace(5)* %tmp1
  store i32 %tmp2, i32 addrspace(1)* %out
  ret void
 }
@ -46,19 +47,19 @@ entry:
 ; EG: MOVA_INT
 define amdgpu_kernel void @vector_write(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
 entry:
-  %tmp = alloca [4 x i32]
-  %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
-  %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
-  %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
-  %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
-  store i32 0, i32* %x
-  store i32 0, i32* %y
-  store i32 0, i32* %z
-  store i32 0, i32* %w
-  %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %w_index
-  store i32 1, i32* %tmp1
-  %tmp2 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %r_index
-  %tmp3 = load i32, i32* %tmp2
+  %tmp = alloca [4 x i32], addrspace(5)
+  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+  store i32 0, i32 addrspace(5)* %x
+  store i32 0, i32 addrspace(5)* %y
+  store i32 0, i32 addrspace(5)* %z
+  store i32 0, i32 addrspace(5)* %w
+  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %w_index
+  store i32 1, i32 addrspace(5)* %tmp1
+  %tmp2 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %r_index
+  %tmp3 = load i32, i32 addrspace(5)* %tmp2
  store i32 %tmp3, i32 addrspace(1)* %out
  ret void
 }
@ -73,19 +74,19 @@ entry:
 ; EG: STORE_RAW
 define amdgpu_kernel void @bitcast_gep(i32 addrspace(1)* %out, i32 %w_index, i32 %r_index) {
 entry:
-  %tmp = alloca [4 x i32]
-  %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
-  %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
-  %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
-  %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
-  store i32 0, i32* %x
-  store i32 0, i32* %y
-  store i32 0, i32* %z
-  store i32 0, i32* %w
-  %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
-  %tmp2 = bitcast i32* %tmp1 to [4 x i32]*
-  %tmp3 = getelementptr [4 x i32], [4 x i32]* %tmp2, i32 0, i32 0
-  %tmp4 = load i32, i32* %tmp3
+  %tmp = alloca [4 x i32], addrspace(5)
+  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+  store i32 0, i32 addrspace(5)* %x
+  store i32 0, i32 addrspace(5)* %y
+  store i32 0, i32 addrspace(5)* %z
+  store i32 0, i32 addrspace(5)* %w
+  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+  %tmp2 = bitcast i32 addrspace(5)* %tmp1 to [4 x i32] addrspace(5)*
+  %tmp3 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp2, i32 0, i32 0
+  %tmp4 = load i32, i32 addrspace(5)* %tmp3
  store i32 %tmp4, i32 addrspace(1)* %out
  ret void
 }
@ -95,18 +96,18 @@ entry:
 ; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
 define amdgpu_kernel void @vector_read_bitcast_gep(i32 addrspace(1)* %out, i32 %index) {
 entry:
-  %tmp = alloca [4 x i32]
-  %x = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
-  %y = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
-  %z = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
-  %w = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
-  %bc = bitcast i32* %x to float*
-  store float 1.0, float* %bc
-  store i32 1, i32* %y
-  store i32 2, i32* %z
-  store i32 3, i32* %w
-  %tmp1 = getelementptr inbounds [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
-  %tmp2 = load i32, i32* %tmp1
+  %tmp = alloca [4 x i32], addrspace(5)
+  %x = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+  %y = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+  %z = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+  %w = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+  %bc = bitcast i32 addrspace(5)* %x to float addrspace(5)*
+  store float 1.0, float addrspace(5)* %bc
+  store i32 1, i32 addrspace(5)* %y
+  store i32 2, i32 addrspace(5)* %z
+  store i32 3, i32 addrspace(5)* %w
+  %tmp1 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
+  %tmp2 = load i32, i32 addrspace(5)* %tmp1
  store i32 %tmp2, i32 addrspace(1)* %out
  ret void
 }
@ -123,18 +124,18 @@ entry:
 ; OPT: load float
 define amdgpu_kernel void @vector_read_bitcast_alloca(float addrspace(1)* %out, i32 %index) {
 entry:
-  %tmp = alloca [4 x i32]
-  %tmp.bc = bitcast [4 x i32]* %tmp to [4 x float]*
-  %x = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 0
-  %y = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 1
-  %z = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 2
-  %w = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 3
-  store float 0.0, float* %x
-  store float 1.0, float* %y
-  store float 2.0, float* %z
-  store float 4.0, float* %w
-  %tmp1 = getelementptr inbounds [4 x float], [4 x float]* %tmp.bc, i32 0, i32 %index
-  %tmp2 = load float, float* %tmp1
+  %tmp = alloca [4 x i32], addrspace(5)
+  %tmp.bc = bitcast [4 x i32] addrspace(5)* %tmp to [4 x float] addrspace(5)*
+  %x = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 0
+  %y = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 1
+  %z = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 2
+  %w = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 3
+  store float 0.0, float addrspace(5)* %x
+  store float 1.0, float addrspace(5)* %y
+  store float 2.0, float addrspace(5)* %z
+  store float 4.0, float addrspace(5)* %w
+  %tmp1 = getelementptr inbounds [4 x float], [4 x float] addrspace(5)* %tmp.bc, i32 0, i32 %index
+  %tmp2 = load float, float addrspace(5)* %tmp1
  store float %tmp2, float addrspace(1)* %out
  ret void
 }
@ -146,17 +147,17 @@ entry:
 ; OPT: store i32 %0, i32 addrspace(1)* %out, align 4
 define amdgpu_kernel void @vector_read_with_local_arg(i32 addrspace(3)* %stopper, i32 addrspace(1)* %out, i32 %index) {
 entry:
-  %tmp = alloca [4 x i32]
-  %x = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
-  %y = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 1
-  %z = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 2
-  %w = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 3
-  store i32 0, i32* %x
-  store i32 1, i32* %y
-  store i32 2, i32* %z
-  store i32 3, i32* %w
-  %tmp1 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 %index
-  %tmp2 = load i32, i32* %tmp1
+  %tmp = alloca [4 x i32], addrspace(5)
+  %x = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 0
+  %y = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 1
+  %z = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 2
+  %w = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 3
+  store i32 0, i32 addrspace(5)* %x
+  store i32 1, i32 addrspace(5)* %y
+  store i32 2, i32 addrspace(5)* %z
+  store i32 3, i32 addrspace(5)* %w
+  %tmp1 = getelementptr [4 x i32], [4 x i32] addrspace(5)* %tmp, i32 0, i32 %index
+  %tmp2 = load i32, i32 addrspace(5)* %tmp1
  store i32 %tmp2, i32 addrspace(1)* %out
  ret void
 }