diff --git a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index 696910911f9e..307b80483aab 100644 --- a/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -620,6 +620,13 @@ bool AMDGPUDAGToDAGISel::isConstantLoad(const LoadSDNode *N, int CbId) const { } bool AMDGPUDAGToDAGISel::isGlobalLoad(const LoadSDNode *N) const { + if (N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) { + const AMDGPUSubtarget &ST = TM.getSubtarget(); + if (ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS || + N->getMemoryVT().bitsLT(MVT::i32)) { + return true; + } + } return checkType(N->getSrcValue(), AMDGPUAS::GLOBAL_ADDRESS); } diff --git a/llvm/lib/Target/R600/R600Instructions.td b/llvm/lib/Target/R600/R600Instructions.td index 3511dcd1ead3..9ff389701e63 100644 --- a/llvm/lib/Target/R600/R600Instructions.td +++ b/llvm/lib/Target/R600/R600Instructions.td @@ -1418,16 +1418,6 @@ def VTX_READ_GLOBAL_128_eg : VTX_READ_128_eg <1, [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; -//===----------------------------------------------------------------------===// -// Constant Loads -// XXX: We are currently storing all constants in the global address space. -//===----------------------------------------------------------------------===// - -def CONSTANT_LOAD_eg : VTX_READ_32_eg <1, - [(set i32:$dst_gpr, (constant_load ADDRVTX_READ:$src_gpr))] ->; - - } // End Predicates = [isEG] //===----------------------------------------------------------------------===// @@ -1883,15 +1873,6 @@ def VTX_READ_GLOBAL_128_cm : VTX_READ_128_cm <1, [(set v4i32:$dst_gpr, (global_load ADDRVTX_READ:$src_gpr))] >; -//===----------------------------------------------------------------------===// -// Constant Loads -// XXX: We are currently storing all constants in the global address space. -//===----------------------------------------------------------------------===// - -def CONSTANT_LOAD_cm : VTX_READ_32_cm <1, - [(set i32:$dst_gpr, (constant_load ADDRVTX_READ:$src_gpr))] ->; - } // End isCayman //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/R600/load.ll b/llvm/test/CodeGen/R600/load.ll index f36f20c63d55..6eef7c7966e5 100644 --- a/llvm/test/CodeGen/R600/load.ll +++ b/llvm/test/CodeGen/R600/load.ll @@ -2,6 +2,10 @@ ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK %s ; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck --check-prefix=SI-CHECK %s +;===------------------------------------------------------------------------===; +; GLOBAL ADDRESS SPACE +;===------------------------------------------------------------------------===; + ; Load an i8 value from the global address space. ; R600-CHECK: @load_i8 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} @@ -100,31 +104,6 @@ entry: ret void } -; Load an i32 value from the constant address space. -; R600-CHECK: @load_const_addrspace_i32 -; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 - -; SI-CHECK: @load_const_addrspace_i32 -; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}} -define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { -entry: - %0 = load i32 addrspace(2)* %in - store i32 %0, i32 addrspace(1)* %out - ret void -} - -; Load a f32 value from the constant address space. -; R600-CHECK: @load_const_addrspace_f32 -; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 - -; SI-CHECK: @load_const_addrspace_f32 -; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}} -define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { - %1 = load float addrspace(2)* %in - store float %1, float addrspace(1)* %out - ret void -} - ; R600-CHECK: @load_i64 ; R600-CHECK: RAT ; R600-CHECK: RAT @@ -166,3 +145,121 @@ entry: store i64 %1, i64 addrspace(1)* %out ret void } + +;===------------------------------------------------------------------------===; +; CONSTANT ADDRESS SPACE +;===------------------------------------------------------------------------===; + +; Load a sign-extended i8 value +; R600-CHECK: @load_const_i8_sext +; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600-CHECK: 24 +; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600-CHECK: 24 +; SI-CHECK: @load_const_i8_sext +; SI-CHECK: BUFFER_LOAD_SBYTE VGPR{{[0-9]+}}, +define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { +entry: + %0 = load i8 addrspace(2)* %in + %1 = sext i8 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; Load an aligned i8 value +; R600-CHECK: @load_const_i8_aligned +; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI-CHECK: @load_const_i8_aligned +; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}}, +define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { +entry: + %0 = load i8 addrspace(2)* %in + %1 = zext i8 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; Load an un-aligned i8 value +; R600-CHECK: @load_const_i8_unaligned +; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI-CHECK: @load_const_i8_unaligned +; SI-CHECK: BUFFER_LOAD_UBYTE VGPR{{[0-9]+}}, +define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { +entry: + %0 = getelementptr i8 addrspace(2)* %in, i32 1 + %1 = load i8 addrspace(2)* %0 + %2 = zext i8 %1 to i32 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; Load a sign-extended i16 value +; R600-CHECK: @load_const_i16_sext +; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] +; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] +; R600-CHECK: 16 +; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] +; R600-CHECK: 16 +; SI-CHECK: @load_const_i16_sext +; SI-CHECK: BUFFER_LOAD_SSHORT +define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { +entry: + %0 = load i16 addrspace(2)* %in + %1 = sext i16 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; Load an aligned i16 value +; R600-CHECK: @load_const_i16_aligned +; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI-CHECK: @load_const_i16_aligned +; SI-CHECK: BUFFER_LOAD_USHORT +define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { +entry: + %0 = load i16 addrspace(2)* %in + %1 = zext i16 %0 to i32 + store i32 %1, i32 addrspace(1)* %out + ret void +} + +; Load an un-aligned i16 value +; R600-CHECK: @load_const_i16_unaligned +; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} +; SI-CHECK: @load_const_i16_unaligned +; SI-CHECK: BUFFER_LOAD_USHORT +define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { +entry: + %0 = getelementptr i16 addrspace(2)* %in, i32 1 + %1 = load i16 addrspace(2)* %0 + %2 = zext i16 %1 to i32 + store i32 %2, i32 addrspace(1)* %out + ret void +} + +; Load an i32 value from the constant address space. +; R600-CHECK: @load_const_addrspace_i32 +; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 + +; SI-CHECK: @load_const_addrspace_i32 +; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}} +define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { +entry: + %0 = load i32 addrspace(2)* %in + store i32 %0, i32 addrspace(1)* %out + ret void +} + +; Load a f32 value from the constant address space. +; R600-CHECK: @load_const_addrspace_f32 +; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 + +; SI-CHECK: @load_const_addrspace_f32 +; SI-CHECK: S_LOAD_DWORD SGPR{{[0-9]+}} +define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { + %1 = load float addrspace(2)* %in + store float %1, float addrspace(1)* %out + ret void +} +