llvm-project/llvm/test/CodeGen/AMDGPU/vop-shrink.ll

; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s

; Test that we correctly commute a sub instruction
; FUNC-LABEL: {{^}}sub_rev:
; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s
; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s

; ModuleID = 'vop-shrink.ll'

define amdgpu_kernel void @sub_rev(i32 addrspace(1)* %out, <4 x i32> %sgpr, i32 %cond) {
entry:
  %vgpr = call i32 @llvm.amdgcn.workitem.id.x() #1
  %tmp = icmp eq i32 %cond, 0
  br i1 %tmp, label %if, label %else

if:                                               ; preds = %entry
  %tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
  %tmp2 = extractelement <4 x i32> %sgpr, i32 1
  store i32 %tmp2, i32 addrspace(1)* %out
  br label %endif

else:                                             ; preds = %entry
  %tmp3 = extractelement <4 x i32> %sgpr, i32 2
  %tmp4 = sub i32 %vgpr, %tmp3
  store i32 %tmp4, i32 addrspace(1)* %out
  br label %endif

endif:                                            ; preds = %else, %if
  ret void
}

; Test that we fold an immediate that was illegal for a 64-bit op into the
; 32-bit op when we shrink it.

; FUNC-LABEL: {{^}}add_fold:
; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000
define amdgpu_kernel void @add_fold(float addrspace(1)* %out) {
entry:
  %tmp = call i32 @llvm.amdgcn.workitem.id.x()
  %tmp1 = uitofp i32 %tmp to float
  %tmp2 = fadd float %tmp1, 1.024000e+03
  store float %tmp2, float addrspace(1)* %out
  ret void
}

; Function Attrs: nounwind readnone
declare i32 @llvm.amdgcn.workitem.id.x() #0

attributes #0 = { nounwind readnone }
attributes #1 = { readnone }
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`; RUN: llc -march=amdgcn -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s`
			`; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s \| FileCheck -check-prefix=SI -check-prefix=FUNC %s`
R600/SI: Fix incorrect commute operation in shrink instructions pass We were commuting the instruction by still shrinking it using the original opcode. NOTE: This is a candidate for the 3.5 branch. llvm-svn: 214463 2014-08-01 08:32:28 +08:00
			`; Test that we correctly commute a sub instruction`
R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; FUNC-LABEL: {{^}}sub_rev:`
AMDGPU: Add sdst operand to VOP2b instructions The VOP3 encoding of these allows any SGPR pair for the i1 output, but this was forced before to always use vcc. This doesn't yet try to use this, but does add the operand to the definitions so the main change is adding vcc to the output of the VOP2 encoding. llvm-svn: 246358 2015-08-29 15:16:50 +08:00			`; SI-NOT: v_sub_i32_e32 v{{[0-9]+}}, vcc, s`
			`; SI: v_subrev_i32_e32 v{{[0-9]+}}, vcc, s`
R600/SI: Fix incorrect commute operation in shrink instructions pass We were commuting the instruction by still shrinking it using the original opcode. NOTE: This is a candidate for the 3.5 branch. llvm-svn: 214463 2014-08-01 08:32:28 +08:00
			`; ModuleID = 'vop-shrink.ll'`

AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @sub_rev(i32 addrspace(1)* %out, <4 x i32> %sgpr, i32 %cond) {`
R600/SI: Fix incorrect commute operation in shrink instructions pass We were commuting the instruction by still shrinking it using the original opcode. NOTE: This is a candidate for the 3.5 branch. llvm-svn: 214463 2014-08-01 08:32:28 +08:00			`entry:`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`%vgpr = call i32 @llvm.amdgcn.workitem.id.x() #1`
R600/SI: Fix incorrect commute operation in shrink instructions pass We were commuting the instruction by still shrinking it using the original opcode. NOTE: This is a candidate for the 3.5 branch. llvm-svn: 214463 2014-08-01 08:32:28 +08:00			`%tmp = icmp eq i32 %cond, 0`
			`br i1 %tmp, label %if, label %else`

			`if: ; preds = %entry`
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction One of several parallel first steps to remove the target type of pointers, replacing them with a single opaque pointer type. This adds an explicit type parameter to the gep instruction so that when the first parameter becomes an opaque pointer type, the type to gep through is still available to the instructions. * This doesn't modify gep operators, only instructions (operators will be handled separately) * Textual IR changes only. Bitcode (including upgrade) and changing the in-memory representation will be in separate changes. * geps of vectors are transformed as: getelementptr <4 x float> %x, ... ->getelementptr float, <4 x float> %x, ... Then, once the opaque pointer type is introduced, this will ultimately look like: getelementptr float, <4 x ptr> %x with the unambiguous interpretation that it is a vector of pointers to float. * address spaces remain on the pointer, not the type: getelementptr float addrspace(1)* %x ->getelementptr float, float addrspace(1)* %x Then, eventually: getelementptr float, ptr addrspace(1) %x Importantly, the massive amount of test case churn has been automated by same crappy python code. I had to manually update a few test cases that wouldn't fit the script's model (r228970,r229196,r229197,r229198). The python script just massages stdin and writes the result to stdout, I then wrapped that in a shell script to handle replacing files, then using the usual find+xargs to migrate all the files. update.py: import fileinput import sys import re ibrep = re.compile(r"(^.?[^%\w]getelementptr inbounds )(((?:<\d x )?)(.?)(\| addrspace\(\d\)) \(\|>)(?:$\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$))") normrep = re.compile( r"(^.?[^%\w]getelementptr )(((?:<\d* x )?)(.?)(\| addrspace\(\d\)) \(\|>)(?:$\| (?:%\|@\|null\|undef\|blockaddress\|getelementptr\|addrspacecast\|bitcast\|inttoptr\|\[\[[a-zA-Z]\|\{\{).$))") def conv(match, line): if not match: return line line = match.groups()[0] if len(match.groups()[5]) == 0: line += match.groups()[2] line += match.groups()[3] line += ", " line += match.groups()[1] line += "\n" return line for line in sys.stdin: if line.find("getelementptr ") == line.find("getelementptr inbounds"): if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("): line = conv(re.match(ibrep, line), line) elif line.find("getelementptr ") != line.find("getelementptr ("): line = conv(re.match(normrep, line), line) sys.stdout.write(line) apply.sh: for name in "$@" do python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name" rm -f "$name.tmp" done The actual commands: From llvm/src: find test/ -name .ll \| xargs ./apply.sh From llvm/src/tools/clang: find test/ -name .mm -o -name .m -o -name .cpp -o -name .c \| xargs -I '{}' ../../apply.sh "{}" From llvm/src/tools/polly: find test/ -name *.ll \| xargs ./apply.sh After that, check-all (with llvm, clang, clang-tools-extra, lld, compiler-rt, and polly all checked out). The extra 'rm' in the apply.sh script is due to a few files in clang's test suite using interesting unicode stuff that my python script was throwing exceptions on. None of those files needed to be migrated, so it seemed sufficient to ignore those cases. Reviewers: rafael, dexonsmith, grosser Differential Revision: http://reviews.llvm.org/D7636 llvm-svn: 230786 2015-02-28 03:29:02 +08:00			`%tmp1 = getelementptr i32, i32 addrspace(1)* %out, i32 1`
R600/SI: Fix incorrect commute operation in shrink instructions pass We were commuting the instruction by still shrinking it using the original opcode. NOTE: This is a candidate for the 3.5 branch. llvm-svn: 214463 2014-08-01 08:32:28 +08:00			`%tmp2 = extractelement <4 x i32> %sgpr, i32 1`
			`store i32 %tmp2, i32 addrspace(1)* %out`
			`br label %endif`

			`else: ; preds = %entry`
			`%tmp3 = extractelement <4 x i32> %sgpr, i32 2`
			`%tmp4 = sub i32 %vgpr, %tmp3`
			`store i32 %tmp4, i32 addrspace(1)* %out`
			`br label %endif`

			`endif: ; preds = %else, %if`
			`ret void`
			`}`

R600/SI: Fold immediates when shrinking instructions This will prevent us from using extra MOV instructions once we prefer selecting 64-bit instructions. llvm-svn: 214464 2014-08-01 08:32:33 +08:00			`; Test that we fold an immediate that was illegal for a 64-bit op into the`
			`; 32-bit op when we shrink it.`

R600: Call EmitFunctionHeader() in the AsmPrinter to populate the ELF symbol table llvm-svn: 218776 2014-10-02 01:15:17 +08:00			`; FUNC-LABEL: {{^}}add_fold:`
R600/SI: Change all instruction assembly names to lowercase. This matches the format produced by the AMD proprietary driver. //==================================================================// // Shell script for converting .ll test cases: (Pass the .ll files you want to convert to this script as arguments). //==================================================================// ; This was necessary on my system so that A-Z in sed would match only ; upper case. I'm not sure why. export LC_ALL='C' TEST_FILES="$" MATCHES=`grep -v Patterns SIInstructions.td \| grep -o '"[A-Z0-9_]\+["e]' \| grep -o '[A-Z0-9_]\+' \| sort -r` for f in $TEST_FILES; do # Check that there are SI tests: grep -q -e 'verde' -e 'bonaire' -e 'SI' -e 'tahiti' $f if [ $? -eq 0 ]; then for match in $MATCHES; do sed -i -e "s/\([ :]$match\)/\L\1/" $f done # Try to get check lines with partial instruction names sed -i 's/\(;[ ]SI[A-Z\\-]: \)\([A-Z_0-9]\+\)/\1\L\2/' $f fi done sed -i -e 's/bb0_1/BB0_1/g' ../../../test/CodeGen/R600/infinite-loop.ll sed -i -e 's/SI-NOT: bfe/SI-NOT: {{[^@]}}bfe/g'../../../test/CodeGen/R600/llvm.AMDGPU.bfe.32.ll ../../../test/CodeGen/R600/sext-in-reg.ll sed -i -e 's/exp_IEEE/EXP_IEEE/g' ../../../test/CodeGen/R600/llvm.exp2.ll sed -i -e 's/numVgprs/NumVgprs/g' ../../../test/CodeGen/R600/register-count-comments.ll sed -i 's/\(; CHECK[-NOT]*: \)\([A-Z_0-9]\+\)/\1\L\2/' ../../../test/CodeGen/R600/select64.ll ../../../test/CodeGen/R600/sgpr-copy.ll //==================================================================// // Shell script for converting .td files (run this last) //==================================================================// export LC_ALL='C' sed -i -e '/Patterns/!s/\("[A-Z0-9_]\+[ "e]\)/\L\1/g' SIInstructions.td sed -i -e 's/"EXP/"exp/g' SIInstrInfo.td llvm-svn: 221350 2014-11-05 22:50:53 +08:00			`; SI: v_add_f32_e32 v{{[0-9]+}}, 0x44800000`
AMDGPU: Mark all unspecified CC functions in tests as amdgpu_kernel Currently the default C calling convention functions are treated the same as compute kernels. Make this explicit so the default calling convention can be changed to a non-kernel. Converted with perl -pi -e 's/define void/define amdgpu_kernel void/' on the relevant test directories (and undoing in one place that actually wanted a non-kernel). llvm-svn: 298444 2017-03-22 05:39:51 +08:00			`define amdgpu_kernel void @add_fold(float addrspace(1)* %out) {`
R600/SI: Fold immediates when shrinking instructions This will prevent us from using extra MOV instructions once we prefer selecting 64-bit instructions. llvm-svn: 214464 2014-08-01 08:32:33 +08:00			`entry:`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`%tmp = call i32 @llvm.amdgcn.workitem.id.x()`
R600/SI: Fold immediates when shrinking instructions This will prevent us from using extra MOV instructions once we prefer selecting 64-bit instructions. llvm-svn: 214464 2014-08-01 08:32:33 +08:00			`%tmp1 = uitofp i32 %tmp to float`
			`%tmp2 = fadd float %tmp1, 1.024000e+03`
			`store float %tmp2, float addrspace(1)* %out`
			`ret void`
			`}`

R600/SI: Fix incorrect commute operation in shrink instructions pass We were commuting the instruction by still shrinking it using the original opcode. NOTE: This is a candidate for the 3.5 branch. llvm-svn: 214463 2014-08-01 08:32:28 +08:00			`; Function Attrs: nounwind readnone`
AMDGPU: Remove some old intrinsic uses from tests llvm-svn: 260493 2016-02-11 14:02:01 +08:00			`declare i32 @llvm.amdgcn.workitem.id.x() #0`
R600/SI: Fix incorrect commute operation in shrink instructions pass We were commuting the instruction by still shrinking it using the original opcode. NOTE: This is a candidate for the 3.5 branch. llvm-svn: 214463 2014-08-01 08:32:28 +08:00
			`attributes #0 = { nounwind readnone }`
			`attributes #1 = { readnone }`