2017-11-13 13:11:54 +08:00
|
|
|
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
|
|
|
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
|
2014-08-16 01:49:05 +08:00
|
|
|
|
|
|
|
; Test that doing a shift of a pointer with a constant add will be
|
|
|
|
; folded into the constant offset addressing mode even if the add has
|
|
|
|
; multiple uses. This is relevant to accessing 2 separate, adjacent
|
|
|
|
; LDS globals.
|
|
|
|
|
|
|
|
|
2016-02-11 14:02:01 +08:00
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
|
2014-11-14 03:56:13 +08:00
|
|
|
@lds0 = addrspace(3) global [512 x float] undef, align 4
|
|
|
|
@lds1 = addrspace(3) global [512 x float] undef, align 4
|
2014-08-16 01:49:05 +08:00
|
|
|
|
|
|
|
|
|
|
|
; Make sure the (add tid, 2) << 2 gets folded into the ds's offset as (tid << 2) + 8
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}load_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_read_b32 {{v[0-9]+}}, [[PTR]] offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @load_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
2015-02-28 05:17:42 +08:00
|
|
|
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
2014-08-16 01:49:05 +08:00
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
store float %val0, float addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; Make sure once the first use is folded into the addressing mode, the
|
|
|
|
; remaining add use goes through the normal shl + add constant fold.
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}load_shl_base_lds_1:
|
AMDGPU: Write LDS objects out as global symbols in code generation
Summary:
The symbols use the processor-specific SHN_AMDGPU_LDS section index
introduced with a previous change. The linker is then expected to resolve
relocations, which are also emitted.
Initially disabled for HSA and PAL environments until they have caught up
in terms of linker and runtime loader.
Some notes:
- The llvm.amdgcn.groupstaticsize intrinsics can no longer be lowered
to a constant at compile times, which means some tests can no longer
be applied.
The current "solution" is a terrible hack, but the intrinsic isn't
used by Mesa, so we can keep it for now.
- We no longer know the full LDS size per kernel at compile time, which
means that we can no longer generate a relevant error message at
compile time. It would be possible to add a check for the size of
individual variables, but ultimately the linker will have to perform
the final check.
Change-Id: If66dbf33fccfbf3609aefefa2558ac0850d42275
Reviewers: arsenm, rampitec, t-tye, b-sumner, jsjodin
Subscribers: qcolombet, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61494
llvm-svn: 364297
2019-06-25 19:52:30 +08:00
|
|
|
; GCN: v_lshlrev_b32_e32 [[OFS:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
|
|
|
|
; TODO: integrate into the ds_read_b32 offset using a 16-bit relocation
|
|
|
|
; GCN: v_add_{{[iu]}}32_e32 [[PTR:v[0-9]+]], vcc, lds0@abs32@lo, [[OFS]]
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN: ds_read_b32 [[RESULT:v[0-9]+]], [[PTR]] offset:8
|
2017-11-21 02:24:21 +08:00
|
|
|
; GCN: v_add_{{[iu]}}32_e32 [[ADDUSE:v[0-9]+]], vcc, 8, v{{[0-9]+}}
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-DAG: buffer_store_dword [[RESULT]]
|
|
|
|
; GCN-DAG: buffer_store_dword [[ADDUSE]]
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @load_shl_base_lds_1(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
2015-02-28 05:17:42 +08:00
|
|
|
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
2014-08-16 01:49:05 +08:00
|
|
|
%shl_add_use = shl i32 %idx.0, 2
|
|
|
|
store i32 %shl_add_use, i32 addrspace(1)* %add_use, align 4
|
|
|
|
store float %val0, float addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2014-11-14 03:56:13 +08:00
|
|
|
@maxlds = addrspace(3) global [65536 x i8] undef, align 4
|
2014-08-16 01:49:05 +08:00
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}load_shl_base_lds_max_offset
|
|
|
|
; GCN: ds_read_u8 v{{[0-9]+}}, v{{[0-9]+}} offset:65535
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @load_shl_base_lds_max_offset(i8 addrspace(1)* %out, i8 addrspace(3)* %lds, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 65535
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [65536 x i8], [65536 x i8] addrspace(3)* @maxlds, i32 0, i32 %idx.0
|
2015-02-28 05:17:42 +08:00
|
|
|
%val0 = load i8, i8 addrspace(3)* %arrayidx0
|
2014-08-16 01:49:05 +08:00
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use
|
|
|
|
store i8 %val0, i8 addrspace(1)* %out
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; The two globals are placed adjacent in memory, so the same base
|
|
|
|
; pointer can be used with an offset into the second one.
|
|
|
|
|
AMDGPU: Write LDS objects out as global symbols in code generation
Summary:
The symbols use the processor-specific SHN_AMDGPU_LDS section index
introduced with a previous change. The linker is then expected to resolve
relocations, which are also emitted.
Initially disabled for HSA and PAL environments until they have caught up
in terms of linker and runtime loader.
Some notes:
- The llvm.amdgcn.groupstaticsize intrinsics can no longer be lowered
to a constant at compile times, which means some tests can no longer
be applied.
The current "solution" is a terrible hack, but the intrinsic isn't
used by Mesa, so we can keep it for now.
- We no longer know the full LDS size per kernel at compile time, which
means that we can no longer generate a relevant error message at
compile time. It would be possible to add a check for the size of
individual variables, but ultimately the linker will have to perform
the final check.
Change-Id: If66dbf33fccfbf3609aefefa2558ac0850d42275
Reviewers: arsenm, rampitec, t-tye, b-sumner, jsjodin
Subscribers: qcolombet, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61494
llvm-svn: 364297
2019-06-25 19:52:30 +08:00
|
|
|
; TODO: Recover the optimization of using ds_read2st64_b32 using alignment hints
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}load_shl_base_lds_2:
|
AMDGPU: Write LDS objects out as global symbols in code generation
Summary:
The symbols use the processor-specific SHN_AMDGPU_LDS section index
introduced with a previous change. The linker is then expected to resolve
relocations, which are also emitted.
Initially disabled for HSA and PAL environments until they have caught up
in terms of linker and runtime loader.
Some notes:
- The llvm.amdgcn.groupstaticsize intrinsics can no longer be lowered
to a constant at compile times, which means some tests can no longer
be applied.
The current "solution" is a terrible hack, but the intrinsic isn't
used by Mesa, so we can keep it for now.
- We no longer know the full LDS size per kernel at compile time, which
means that we can no longer generate a relevant error message at
compile time. It would be possible to add a check for the size of
individual variables, but ultimately the linker will have to perform
the final check.
Change-Id: If66dbf33fccfbf3609aefefa2558ac0850d42275
Reviewers: arsenm, rampitec, t-tye, b-sumner, jsjodin
Subscribers: qcolombet, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61494
llvm-svn: 364297
2019-06-25 19:52:30 +08:00
|
|
|
; GCN: v_lshlrev_b32_e32 [[OFS:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN-DAG: v_add_{{[iu]}}32_e32 [[PTR0:v[0-9]+]], vcc, lds0@abs32@lo, [[OFS]]
|
|
|
|
; GCN-DAG: v_add_{{[iu]}}32_e32 [[PTR1:v[0-9]+]], vcc, lds1@abs32@lo, [[OFS]]
|
[AMDGPU] Fix typo in SIInstrInfo::memOpsHaveSameBasePtr
Summary:
The typo has been present since memOpsHaveSameBasePtr was introduced in
r313208.
It caused SIInstrInfo::shouldClusterMemOps to cluster more mem ops than
it was supposed to.
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71616
2019-12-18 00:09:02 +08:00
|
|
|
; GCN-DAG: s_mov_b32 m0, -1
|
AMDGPU: Write LDS objects out as global symbols in code generation
Summary:
The symbols use the processor-specific SHN_AMDGPU_LDS section index
introduced with a previous change. The linker is then expected to resolve
relocations, which are also emitted.
Initially disabled for HSA and PAL environments until they have caught up
in terms of linker and runtime loader.
Some notes:
- The llvm.amdgcn.groupstaticsize intrinsics can no longer be lowered
to a constant at compile times, which means some tests can no longer
be applied.
The current "solution" is a terrible hack, but the intrinsic isn't
used by Mesa, so we can keep it for now.
- We no longer know the full LDS size per kernel at compile time, which
means that we can no longer generate a relevant error message at
compile time. It would be possible to add a check for the size of
individual variables, but ultimately the linker will have to perform
the final check.
Change-Id: If66dbf33fccfbf3609aefefa2558ac0850d42275
Reviewers: arsenm, rampitec, t-tye, b-sumner, jsjodin
Subscribers: qcolombet, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D61494
llvm-svn: 364297
2019-06-25 19:52:30 +08:00
|
|
|
|
|
|
|
; GCN-DAG: ds_read_b32 {{v[0-9]+}}, [[PTR0]] offset:256
|
|
|
|
; GCN-DAG: ds_read_b32 {{v[0-9]+}}, [[PTR1]] offset:256
|
|
|
|
; TODO: ds_read2st64_b32 {{v\[[0-9]+:[0-9]+\]}}, [[PTR]] offset0:1 offset1:9
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @load_shl_base_lds_2(float addrspace(1)* %out) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 64
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
2015-02-28 05:17:42 +08:00
|
|
|
%val0 = load float, float addrspace(3)* %arrayidx0, align 4
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
|
2015-02-28 05:17:42 +08:00
|
|
|
%val1 = load float, float addrspace(3)* %arrayidx1, align 4
|
2014-08-16 01:49:05 +08:00
|
|
|
%sum = fadd float %val0, %val1
|
|
|
|
store float %sum, float addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}store_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_write_b32 [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @store_shl_base_lds_0(float addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
store float 1.0, float addrspace(3)* %arrayidx0, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
; --------------------------------------------------------------------------------
|
|
|
|
; Atomics.
|
|
|
|
|
2014-11-14 03:56:13 +08:00
|
|
|
@lds2 = addrspace(3) global [512 x i32] undef, align 4
|
2014-08-16 01:49:05 +08:00
|
|
|
|
2017-03-22 05:39:51 +08:00
|
|
|
; define amdgpu_kernel void @atomic_load_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
; %idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2015-02-28 05:17:42 +08:00
|
|
|
; %val = load atomic i32, i32 addrspace(3)* %arrayidx0 seq_cst, align 4
|
2014-08-16 01:49:05 +08:00
|
|
|
; store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
; ret void
|
|
|
|
; }
|
|
|
|
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_cmpxchg_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_cmpst_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}}, {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_cmpxchg_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use, i32 %swap) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%pair = cmpxchg i32 addrspace(3)* %arrayidx0, i32 7, i32 %swap seq_cst monotonic
|
|
|
|
%result = extractvalue { i32, i1 } %pair, 0
|
|
|
|
store i32 %result, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_swap_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_wrxchg_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_swap_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw xchg i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_add_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_add_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_add_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw add i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_sub_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_sub_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_sub_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw sub i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_and_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_and_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_and_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw and i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_or_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_or_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_or_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw or i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_xor_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_xor_rtn_b32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_xor_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw xor i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-03-22 05:39:51 +08:00
|
|
|
; define amdgpu_kernel void @atomic_nand_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
; %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
; %idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
; %arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
; %val = atomicrmw nand i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
; store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
; store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
; ret void
|
|
|
|
; }
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_min_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_min_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_min_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw min i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_max_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_max_rtn_i32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_max_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw max i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_umin_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_min_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_umin_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw umin i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}atomic_umax_shl_base_lds_0:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[PTR:v[0-9]+]], 2, {{v[0-9]+}}
|
|
|
|
; GCN: ds_max_rtn_u32 {{v[0-9]+}}, [[PTR]], {{v[0-9]+}} offset:8
|
|
|
|
; GCN: s_endpgm
|
2017-03-22 05:39:51 +08:00
|
|
|
define amdgpu_kernel void @atomic_umax_shl_base_lds_0(i32 addrspace(1)* %out, i32 addrspace(1)* %add_use) #0 {
|
2016-02-11 14:02:01 +08:00
|
|
|
%tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() #1
|
2014-08-16 01:49:05 +08:00
|
|
|
%idx.0 = add nsw i32 %tid.x, 2
|
[opaque pointer type] Add textual IR support for explicit type parameter to getelementptr instruction
One of several parallel first steps to remove the target type of pointers,
replacing them with a single opaque pointer type.
This adds an explicit type parameter to the gep instruction so that when the
first parameter becomes an opaque pointer type, the type to gep through is
still available to the instructions.
* This doesn't modify gep operators, only instructions (operators will be
handled separately)
* Textual IR changes only. Bitcode (including upgrade) and changing the
in-memory representation will be in separate changes.
* geps of vectors are transformed as:
getelementptr <4 x float*> %x, ...
->getelementptr float, <4 x float*> %x, ...
Then, once the opaque pointer type is introduced, this will ultimately look
like:
getelementptr float, <4 x ptr> %x
with the unambiguous interpretation that it is a vector of pointers to float.
* address spaces remain on the pointer, not the type:
getelementptr float addrspace(1)* %x
->getelementptr float, float addrspace(1)* %x
Then, eventually:
getelementptr float, ptr addrspace(1) %x
Importantly, the massive amount of test case churn has been automated by
same crappy python code. I had to manually update a few test cases that
wouldn't fit the script's model (r228970,r229196,r229197,r229198). The
python script just massages stdin and writes the result to stdout, I
then wrapped that in a shell script to handle replacing files, then
using the usual find+xargs to migrate all the files.
update.py:
import fileinput
import sys
import re
ibrep = re.compile(r"(^.*?[^%\w]getelementptr inbounds )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
normrep = re.compile( r"(^.*?[^%\w]getelementptr )(((?:<\d* x )?)(.*?)(| addrspace\(\d\)) *\*(|>)(?:$| *(?:%|@|null|undef|blockaddress|getelementptr|addrspacecast|bitcast|inttoptr|\[\[[a-zA-Z]|\{\{).*$))")
def conv(match, line):
if not match:
return line
line = match.groups()[0]
if len(match.groups()[5]) == 0:
line += match.groups()[2]
line += match.groups()[3]
line += ", "
line += match.groups()[1]
line += "\n"
return line
for line in sys.stdin:
if line.find("getelementptr ") == line.find("getelementptr inbounds"):
if line.find("getelementptr inbounds") != line.find("getelementptr inbounds ("):
line = conv(re.match(ibrep, line), line)
elif line.find("getelementptr ") != line.find("getelementptr ("):
line = conv(re.match(normrep, line), line)
sys.stdout.write(line)
apply.sh:
for name in "$@"
do
python3 `dirname "$0"`/update.py < "$name" > "$name.tmp" && mv "$name.tmp" "$name"
rm -f "$name.tmp"
done
The actual commands:
From llvm/src:
find test/ -name *.ll | xargs ./apply.sh
From llvm/src/tools/clang:
find test/ -name *.mm -o -name *.m -o -name *.cpp -o -name *.c | xargs -I '{}' ../../apply.sh "{}"
From llvm/src/tools/polly:
find test/ -name *.ll | xargs ./apply.sh
After that, check-all (with llvm, clang, clang-tools-extra, lld,
compiler-rt, and polly all checked out).
The extra 'rm' in the apply.sh script is due to a few files in clang's test
suite using interesting unicode stuff that my python script was throwing
exceptions on. None of those files needed to be migrated, so it seemed
sufficient to ignore those cases.
Reviewers: rafael, dexonsmith, grosser
Differential Revision: http://reviews.llvm.org/D7636
llvm-svn: 230786
2015-02-28 03:29:02 +08:00
|
|
|
%arrayidx0 = getelementptr inbounds [512 x i32], [512 x i32] addrspace(3)* @lds2, i32 0, i32 %idx.0
|
2014-08-16 01:49:05 +08:00
|
|
|
%val = atomicrmw umax i32 addrspace(3)* %arrayidx0, i32 3 seq_cst
|
|
|
|
store i32 %val, i32 addrspace(1)* %out, align 4
|
|
|
|
store i32 %idx.0, i32 addrspace(1)* %add_use, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_lds:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
|
|
|
|
; GCN: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:32
|
|
|
|
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
|
|
|
|
; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}} offset:64
|
|
|
|
define void @shl_add_ptr_combine_2use_lds(i32 %idx) #0 {
|
|
|
|
%idx.add = add nuw i32 %idx, 4
|
|
|
|
%shl0 = shl i32 %idx.add, 3
|
|
|
|
%shl1 = shl i32 %idx.add, 4
|
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
|
|
|
|
store volatile i32 9, i32 addrspace(3)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(3)* %ptr1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_max_lds_offset:
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
|
|
|
|
; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
|
2017-11-21 02:24:21 +08:00
|
|
|
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 0x1fff0, [[SCALE1]]
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
|
|
|
|
define void @shl_add_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
|
|
|
|
%idx.add = add nuw i32 %idx, 8191
|
|
|
|
%shl0 = shl i32 %idx.add, 3
|
|
|
|
%shl1 = shl i32 %idx.add, 4
|
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
|
|
|
|
store volatile i32 9, i32 addrspace(3)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(3)* %ptr1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_lds_offset:
|
2017-11-21 02:24:21 +08:00
|
|
|
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1000, v0
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
|
|
|
|
; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+$}}
|
|
|
|
; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+$}}
|
|
|
|
define void @shl_add_ptr_combine_2use_both_max_lds_offset(i32 %idx) #0 {
|
|
|
|
%idx.add = add nuw i32 %idx, 4096
|
|
|
|
%shl0 = shl i32 %idx.add, 4
|
|
|
|
%shl1 = shl i32 %idx.add, 5
|
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
|
|
|
|
store volatile i32 9, i32 addrspace(3)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(3)* %ptr1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_private:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 2, v0
|
2020-01-22 06:27:57 +08:00
|
|
|
; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], 0 offen offset:16
|
2017-11-13 13:11:54 +08:00
|
|
|
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 3, v0
|
2020-01-22 06:27:57 +08:00
|
|
|
; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], 0 offen offset:32
|
2017-11-13 13:11:54 +08:00
|
|
|
define void @shl_add_ptr_combine_2use_private(i16 zeroext %idx.arg) #0 {
|
|
|
|
%idx = zext i16 %idx.arg to i32
|
|
|
|
%idx.add = add nuw i32 %idx, 4
|
|
|
|
%shl0 = shl i32 %idx.add, 2
|
|
|
|
%shl1 = shl i32 %idx.add, 3
|
2018-02-03 00:07:16 +08:00
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
|
|
|
|
store volatile i32 9, i32 addrspace(5)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(5)* %ptr1
|
2017-11-13 13:11:54 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_max_private_offset:
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
|
2020-01-22 06:27:57 +08:00
|
|
|
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], 0 offen offset:4088
|
2017-11-21 02:24:21 +08:00
|
|
|
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x1ff0, [[SCALE1]]
|
2020-01-22 06:27:57 +08:00
|
|
|
; GCN: buffer_store_dword v{{[0-9]+}}, [[ADD]], s[0:3], 0 offen{{$}}
|
2017-11-13 13:11:54 +08:00
|
|
|
define void @shl_add_ptr_combine_2use_max_private_offset(i16 zeroext %idx.arg) #0 {
|
|
|
|
%idx = zext i16 %idx.arg to i32
|
|
|
|
%idx.add = add nuw i32 %idx, 511
|
|
|
|
%shl0 = shl i32 %idx.add, 3
|
|
|
|
%shl1 = shl i32 %idx.add, 4
|
2018-02-03 00:07:16 +08:00
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
|
|
|
|
store volatile i32 9, i32 addrspace(5)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(5)* %ptr1
|
2017-11-13 13:11:54 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
; GCN-LABEL: {{^}}shl_add_ptr_combine_2use_both_max_private_offset:
|
2017-11-21 02:24:21 +08:00
|
|
|
; GCN: v_add_{{[iu]}}32_e32 [[ADD:v[0-9]+]], vcc, 0x100, v0
|
2017-11-13 13:11:54 +08:00
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 4, [[ADD]]
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 5, [[ADD]]
|
2020-01-22 06:27:57 +08:00
|
|
|
; GCN-DAG: buffer_store_dword v{{[0-9]+}}, [[SCALE0]], s[0:3], 0 offen{{$}}
|
|
|
|
; GCN: buffer_store_dword v{{[0-9]+}}, [[SCALE1]], s[0:3], 0 offen{{$}}
|
2017-11-13 13:11:54 +08:00
|
|
|
define void @shl_add_ptr_combine_2use_both_max_private_offset(i16 zeroext %idx.arg) #0 {
|
|
|
|
%idx = zext i16 %idx.arg to i32
|
|
|
|
%idx.add = add nuw i32 %idx, 256
|
|
|
|
%shl0 = shl i32 %idx.add, 4
|
|
|
|
%shl1 = shl i32 %idx.add, 5
|
2018-02-03 00:07:16 +08:00
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(5)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(5)*
|
|
|
|
store volatile i32 9, i32 addrspace(5)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(5)* %ptr1
|
2017-11-13 13:11:54 +08:00
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
[DAGCombine] Prune unnused nodes.
Summary:
Nodes that have no uses are eventually pruned when they are selected
from the worklist. Record nodes newly added to the worklist or DAG and
perform pruning after every combine attempt.
Reviewers: efriedma, RKSimon, craig.topper, spatel, jyknight
Reviewed By: jyknight
Subscribers: jdoerfert, jyknight, nemanjai, jvesely, nhaehnle, javed.absar, hiraditya, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58070
llvm-svn: 357283
2019-03-30 01:35:56 +08:00
|
|
|
; FIXME: This or should fold into an offset on the write
|
2017-11-15 07:46:42 +08:00
|
|
|
; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_lds:
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
|
[DAGCombine] Prune unnused nodes.
Summary:
Nodes that have no uses are eventually pruned when they are selected
from the worklist. Record nodes newly added to the worklist or DAG and
perform pruning after every combine attempt.
Reviewers: efriedma, RKSimon, craig.topper, spatel, jyknight
Reviewed By: jyknight
Subscribers: jdoerfert, jyknight, nemanjai, jvesely, nhaehnle, javed.absar, hiraditya, jsji, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D58070
llvm-svn: 357283
2019-03-30 01:35:56 +08:00
|
|
|
; GCN: v_or_b32_e32 [[SCALE1:v[0-9]+]], 32, [[SCALE0]]
|
|
|
|
; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}}
|
2017-11-15 07:46:42 +08:00
|
|
|
|
|
|
|
; GCN: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
|
|
|
|
; GCN: ds_write_b32 [[SCALE1]], v{{[0-9]+}} offset:64
|
|
|
|
define void @shl_or_ptr_combine_2use_lds(i32 %idx) #0 {
|
|
|
|
%idx.add = or i32 %idx, 4
|
|
|
|
%shl0 = shl i32 %idx.add, 3
|
|
|
|
%shl1 = shl i32 %idx.add, 4
|
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
|
|
|
|
store volatile i32 9, i32 addrspace(3)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(3)* %ptr1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; GCN-LABEL: {{^}}shl_or_ptr_combine_2use_max_lds_offset:
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE0:v[0-9]+]], 3, v0
|
|
|
|
; GCN-DAG: v_lshlrev_b32_e32 [[SCALE1:v[0-9]+]], 4, v0
|
|
|
|
; GCN-DAG: ds_write_b32 [[SCALE0]], v{{[0-9]+}} offset:65528
|
|
|
|
; GCN-DAG: v_or_b32_e32 [[ADD1:v[0-9]+]], 0x1fff0, [[SCALE1]]
|
|
|
|
; GCN: ds_write_b32 [[ADD1]], v{{[0-9]+$}}
|
|
|
|
define void @shl_or_ptr_combine_2use_max_lds_offset(i32 %idx) #0 {
|
|
|
|
%idx.add = or i32 %idx, 8191
|
|
|
|
%shl0 = shl i32 %idx.add, 3
|
|
|
|
%shl1 = shl i32 %idx.add, 4
|
|
|
|
%ptr0 = inttoptr i32 %shl0 to i32 addrspace(3)*
|
|
|
|
%ptr1 = inttoptr i32 %shl1 to i32 addrspace(3)*
|
|
|
|
store volatile i32 9, i32 addrspace(3)* %ptr0
|
|
|
|
store volatile i32 10, i32 addrspace(3)* %ptr1
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2014-08-16 01:49:05 +08:00
|
|
|
attributes #0 = { nounwind }
|
|
|
|
attributes #1 = { nounwind readnone }
|