llvm-project/llvm/test/CodeGen/R600/fetch-limits.r600.ll

; RUN: llc < %s -march=r600 -mcpu=r600 | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=rs880 | FileCheck %s
; RUN: llc < %s -march=r600 -mcpu=rv670 | FileCheck %s

; R600 supports 8 fetches in a clause
; CHECK: {{^}}fetch_limits_r600:
; CHECK: Fetch clause
; CHECK: Fetch clause

define void @fetch_limits_r600() #0 {
entry:
  %0 = load <4 x float>, <4 x float> addrspace(8)* null
  %1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
  %2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
  %3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)
  %4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)
  %5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)
  %6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)
  %7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)
  %8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)
  %res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)
  %res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)
  %res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)
  %res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %3, i32 0, i32 0, i32 1)
  %res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %4, i32 0, i32 0, i32 1)
  %res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %5, i32 0, i32 0, i32 1)
  %res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %6, i32 0, i32 0, i32 1)
  %res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %7, i32 0, i32 0, i32 1)
  %res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)
  %a = fadd <4 x float> %res0, %res1
  %b = fadd <4 x float> %res2, %res3
  %c = fadd <4 x float> %res4, %res5
  %d = fadd <4 x float> %res6, %res7
  %e = fadd <4 x float> %res8, %a

  %bc = fadd <4 x float> %b, %c
  %de = fadd <4 x float> %d, %e

  %bcde = fadd <4 x float> %bc, %de

  call void @llvm.R600.store.swizzle(<4 x float> %bcde, i32 0, i32 1)
  ret void
}

attributes #0 = { "ShaderType"="0" } ; Pixel Shader

declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone
declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
R600: Fix the fetch limits for R600 generation GPUs Reviewed-by: Vincent Lejeune <vljn@ovi.com> https://bugs.freedesktop.org/show_bug.cgi?id=64257 llvm-svn: 183560 2013-06-08 04:28:55 +08:00			`; RUN: llc < %s -march=r600 -mcpu=r600 \| FileCheck %s`
			`; RUN: llc < %s -march=r600 -mcpu=rs880 \| FileCheck %s`
			`; RUN: llc < %s -march=r600 -mcpu=rv670 \| FileCheck %s`

			`; R600 supports 8 fetches in a clause`
R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; CHECK: {{^}}fetch_limits_r600:`
R600: Fix the fetch limits for R600 generation GPUs Reviewed-by: Vincent Lejeune <vljn@ovi.com> https://bugs.freedesktop.org/show_bug.cgi?id=64257 llvm-svn: 183560 2013-06-08 04:28:55 +08:00			`; CHECK: Fetch clause`
			`; CHECK: Fetch clause`

			`define void @fetch_limits_r600() #0 {`
			`entry:`
[opaque pointer type] Add textual IR support for explicit type parameter to load instruction Essentially the same as the GEP change in r230786. A similar migration script can be used to update test cases, though a few more test case improvements/changes were required this time around: (r229269-r229278) import fileinput import sys import re pat = re.compile(r"((?:=\|:\|^)\sload (?:atomic )?(?:volatile )?(.?))(\| addrspace\(\d+\) )\($\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$)") for line in sys.stdin: sys.stdout.write(re.sub(pat, r"\1, \2\3*\4", line)) Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7649 llvm-svn: 230794 2015-02-28 05:17:42 +08:00			`%0 = load <4 x float>, <4 x float> addrspace(8)* null`
[opaque pointer type] Add textual IR support for explicit type parameter to gep operator Similar to gep (r230786) and load (r230794) changes. Similar migration script can be used to update test cases, which successfully migrated all of LLVM and Polly, but about 4 test cases needed manually changes in Clang. (this script will read the contents of stdin and massage it into stdout - wrap it in the 'apply.sh' script shown in previous commits + xargs to apply it over a large set of test cases) import fileinput import sys import re rep = re.compile(r"(getelementptr(?:\s+inbounds)?\s\()((<\d\s+x\s+)?([^@]?)(\|\saddrspace\(\d+\))\s\(?(3)>)\s*)(?=$\|%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|zeroinitializer\|<\|\[\[[a-zA-Z]\|\{\{)", re.MULTILINE \| re.DOTALL) def conv(match): line = match.group(1) line += match.group(4) line += ", " line += match.group(2) return line line = sys.stdin.read() off = 0 for match in re.finditer(rep, line): sys.stdout.write(line[off:match.start()]) sys.stdout.write(conv(match)) off = match.end() sys.stdout.write(line[off:]) llvm-svn: 232184 2015-03-14 02:20:45 +08:00			`%1 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)`
			`%2 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)`
			`%3 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 3)`
			`%4 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 4)`
			`%5 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 5)`
			`%6 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 6)`
			`%7 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 7)`
			`%8 = load <4 x float>, <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>], [1024 x <4 x float>] addrspace(8)* null, i64 0, i32 8)`
R600: Fix the fetch limits for R600 generation GPUs Reviewed-by: Vincent Lejeune <vljn@ovi.com> https://bugs.freedesktop.org/show_bug.cgi?id=64257 llvm-svn: 183560 2013-06-08 04:28:55 +08:00			`%res0 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %0, i32 0, i32 0, i32 1)`
			`%res1 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %1, i32 0, i32 0, i32 1)`
			`%res2 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %2, i32 0, i32 0, i32 1)`
			`%res3 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %3, i32 0, i32 0, i32 1)`
			`%res4 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %4, i32 0, i32 0, i32 1)`
			`%res5 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %5, i32 0, i32 0, i32 1)`
			`%res6 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %6, i32 0, i32 0, i32 1)`
			`%res7 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %7, i32 0, i32 0, i32 1)`
			`%res8 = call <4 x float> @llvm.AMDGPU.tex(<4 x float> %8, i32 0, i32 0, i32 1)`
			`%a = fadd <4 x float> %res0, %res1`
			`%b = fadd <4 x float> %res2, %res3`
			`%c = fadd <4 x float> %res4, %res5`
			`%d = fadd <4 x float> %res6, %res7`
			`%e = fadd <4 x float> %res8, %a`

			`%bc = fadd <4 x float> %b, %c`
			`%de = fadd <4 x float> %d, %e`

			`%bcde = fadd <4 x float> %bc, %de`

			`call void @llvm.R600.store.swizzle(<4 x float> %bcde, i32 0, i32 1)`
			`ret void`
			`}`

			`attributes #0 = { "ShaderType"="0" } ; Pixel Shader`

			`declare <4 x float> @llvm.AMDGPU.tex(<4 x float>, i32, i32, i32) readnone`
			`declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)`