2019-09-10 00:06:37 +08:00
|
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s
|
|
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
|
|
|
|
--- |
|
|
|
|
define amdgpu_kernel void @load_global_v8i32_non_uniform(<8 x i32> addrspace(1)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%global.not.uniform.v8i32 = getelementptr <8 x i32>, <8 x i32> addrspace(1)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <8 x i32>, <8 x i32> addrspace(1)* %global.not.uniform.v8i32
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define amdgpu_kernel void @load_global_v4i64_non_uniform(<4 x i64> addrspace(1)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%global.not.uniform.v4i64 = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <4 x i64>, <4 x i64> addrspace(1)* %global.not.uniform.v4i64
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define amdgpu_kernel void @load_global_v16i32_non_uniform(<16 x i32> addrspace(1)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%global.not.uniform.v16i32 = getelementptr <16 x i32>, <16 x i32> addrspace(1)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <16 x i32>, <16 x i32> addrspace(1)* %global.not.uniform.v16i32
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define amdgpu_kernel void @load_global_v8i64_non_uniform(<8 x i64> addrspace(1)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%global.not.uniform.v8i64 = getelementptr <8 x i64>, <8 x i64> addrspace(1)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <8 x i64>, <8 x i64> addrspace(1)* %global.not.uniform.v8i64
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define amdgpu_kernel void @load_global_v8i32_uniform() {ret void}
|
|
|
|
define amdgpu_kernel void @load_global_v4i64_uniform() {ret void}
|
|
|
|
define amdgpu_kernel void @load_global_v16i32_uniform() {ret void}
|
|
|
|
define amdgpu_kernel void @load_global_v8i64_uniform() {ret void}
|
|
|
|
define amdgpu_kernel void @load_constant_v8i32_non_uniform(<8 x i32> addrspace(4)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%constant.not.uniform.v8i32 = getelementptr <8 x i32>, <8 x i32> addrspace(4)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <8 x i32>, <8 x i32> addrspace(4)* %constant.not.uniform.v8i32
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define amdgpu_kernel void @load_constant_v4i64_non_uniform(<4 x i64> addrspace(4)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%constant.not.uniform.v4i64 = getelementptr <4 x i64>, <4 x i64> addrspace(4)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <4 x i64>, <4 x i64> addrspace(4)* %constant.not.uniform.v4i64
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define amdgpu_kernel void @load_constant_v16i32_non_uniform(<16 x i32> addrspace(4)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%constant.not.uniform.v16i32 = getelementptr <16 x i32>, <16 x i32> addrspace(4)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <16 x i32>, <16 x i32> addrspace(4)* %constant.not.uniform.v16i32
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
define amdgpu_kernel void @load_constant_v8i64_non_uniform(<8 x i64> addrspace(4)* %in) {
|
|
|
|
%tmp0 = call i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
%constant.not.uniform.v8i64 = getelementptr <8 x i64>, <8 x i64> addrspace(4)* %in, i32 %tmp0
|
|
|
|
%tmp2 = load <8 x i64>, <8 x i64> addrspace(4)* %constant.not.uniform.v8i64
|
|
|
|
ret void
|
|
|
|
}
|
2019-09-09 23:44:16 +08:00
|
|
|
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
define amdgpu_kernel void @load_constant_v8i32_uniform() {ret void}
|
|
|
|
define amdgpu_kernel void @load_constant_v4i64_uniform() {ret void}
|
|
|
|
define amdgpu_kernel void @load_constant_v16i32_uniform() {ret void}
|
|
|
|
define amdgpu_kernel void @load_constant_v8i64_uniform() {ret void}
|
2019-09-09 23:44:16 +08:00
|
|
|
define amdgpu_kernel void @load_local_uniform() { ret void }
|
|
|
|
define amdgpu_kernel void @load_region_uniform() { ret void }
|
2019-09-10 00:03:45 +08:00
|
|
|
define amdgpu_kernel void @extload_constant_i8_to_i32_uniform() { ret void }
|
|
|
|
define amdgpu_kernel void @extload_global_i8_to_i32_uniform() { ret void }
|
|
|
|
define amdgpu_kernel void @extload_constant_i16_to_i32_uniform() { ret void }
|
|
|
|
define amdgpu_kernel void @extload_global_i16_to_i32_uniform() { ret void }
|
2019-09-10 00:06:37 +08:00
|
|
|
define amdgpu_kernel void @load_constant_i32_uniform_align4() {ret void}
|
|
|
|
define amdgpu_kernel void @load_constant_i32_uniform_align2() {ret void}
|
|
|
|
define amdgpu_kernel void @load_constant_i32_uniform_align1() {ret void}
|
2019-10-02 09:02:21 +08:00
|
|
|
define amdgpu_kernel void @load_private_uniform_sgpr_i32() {ret void}
|
2019-10-10 06:44:49 +08:00
|
|
|
define amdgpu_kernel void @load_constant_v8i32_vgpr_crash() { ret void }
|
|
|
|
define amdgpu_kernel void @load_constant_v8i32_vgpr_crash_loop_phi() { ret void }
|
2019-09-09 23:44:16 +08:00
|
|
|
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #0
|
|
|
|
attributes #0 = { nounwind readnone }
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v8i32_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_global_v8i32_non_uniform
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v8i32, align 32, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v8i32 + 16, align 32, addrspace 1)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v8i32)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v4i64_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v4i64, align 32, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v4i64 + 16, align 32, addrspace 1)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
|
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.global.not.uniform.v4i64)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v16i32_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_global_v16i32_non_uniform
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v16i32, align 64, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 16, align 64, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP32:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP32]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 32, align 64, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v16i32 + 48, align 64, addrspace 1)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v16i32)
|
|
|
|
...
|
|
|
|
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v8i64_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_global_v8i64_non_uniform
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p1) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p1) :: (load 16 from %ir.global.not.uniform.v8i64, align 64, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 16, align 64, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP32:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP32]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 32, align 64, addrspace 1)
|
|
|
|
; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP48:%[0-9]+]]:vgpr(p1) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p1) :: (load 16 from %ir.global.not.uniform.v8i64 + 48, align 64, addrspace 1)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.global.not.uniform.v8i64)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v8i32_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_global_v8i32_uniform
|
2019-12-27 23:17:45 +08:00
|
|
|
; CHECK: (<8 x s32>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
2019-12-27 23:17:45 +08:00
|
|
|
%1:_(<8 x s32>) = G_LOAD %0 :: (invariant load 32, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v4i64_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_global_v4i64_uniform
|
2019-12-27 23:17:45 +08:00
|
|
|
; CHECK: (<4 x s64>) = G_LOAD %0(p1) :: (invariant load 32, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
2019-12-27 23:17:45 +08:00
|
|
|
%1:_(<4 x s64>) = G_LOAD %0 :: (invariant load 32, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v16i32_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_global_v16i32_uniform
|
2019-12-27 23:17:45 +08:00
|
|
|
; CHECK: (<16 x s32>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
2019-12-27 23:17:45 +08:00
|
|
|
%1:_(<16 x s32>) = G_LOAD %0 :: (invariant load 64, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_global_v8i64_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_global_v8i64_uniform
|
2019-12-27 23:17:45 +08:00
|
|
|
; CHECK: (<8 x s64>) = G_LOAD %0(p1) :: (invariant load 64, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p1) = COPY $sgpr0_sgpr1
|
2019-12-27 23:17:45 +08:00
|
|
|
%1:_(<8 x s64>) = G_LOAD %0 :: (invariant load 64, addrspace 1)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v8i32_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v8i32_non_uniform
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32, align 32, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i32 + 16, align 32, addrspace 4)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<8 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v8i32)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v4i64_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v4i64_non_uniform
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64, align 32, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v4i64 + 16, align 32, addrspace 4)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<4 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<4 x s64>) = G_LOAD %0 :: (load 32 from %ir.constant.not.uniform.v4i64)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v16i32_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v16i32_non_uniform
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32, align 64, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 16, align 64, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP32:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP32]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 32, align 64, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<4 x s32>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v16i32 + 48, align 64, addrspace 4)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<16 x s32>) = G_CONCAT_VECTORS [[LOAD0]](<4 x s32>), [[LOAD16]](<4 x s32>), [[LOAD32]](<4 x s32>), [[LOAD48]](<4 x s32>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<16 x s32>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v16i32)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v8i64_non_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v8i64_non_uniform
|
|
|
|
; CHECK: [[PTR:%[0-9]+]]:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: [[LOAD0:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[PTR]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64, align 64, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET16:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 16
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP16:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET16]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD16:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP16]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 16, align 64, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET32:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 32
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP32:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET32]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD32:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP32]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 32, align 64, addrspace 4)
|
|
|
|
; CHECK: [[OFFSET48:%[0-9]+]]:vgpr(s64) = G_CONSTANT i64 48
|
[globalisel] Rename G_GEP to G_PTR_ADD
Summary:
G_GEP is rather poorly named. It's a simple pointer+scalar addition and
doesn't support any of the complexities of getelementptr. I therefore
propose that we rename it. There's a G_PTR_MASK so let's follow that
convention and go with G_PTR_ADD
Reviewers: volkan, aditya_nandakumar, bogner, rovka, arsenm
Subscribers: sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, arphaman, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D69734
2019-11-02 04:18:00 +08:00
|
|
|
; CHECK: [[GEP48:%[0-9]+]]:vgpr(p4) = G_PTR_ADD [[PTR]], [[OFFSET48]](s64)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
; CHECK: [[LOAD48:%[0-9]+]]:vgpr(<2 x s64>) = G_LOAD [[GEP48]](p4) :: (load 16 from %ir.constant.not.uniform.v8i64 + 48, align 64, addrspace 4)
|
2020-05-12 08:24:03 +08:00
|
|
|
; CHECK: %1:vgpr(<8 x s64>) = G_CONCAT_VECTORS [[LOAD0]](<2 x s64>), [[LOAD16]](<2 x s64>), [[LOAD32]](<2 x s64>), [[LOAD48]](<2 x s64>)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<8 x s64>) = G_LOAD %0 :: (load 64 from %ir.constant.not.uniform.v8i64)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v8i32_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v8i32_uniform
|
2020-04-03 17:06:34 +08:00
|
|
|
; CHECK: (<8 x s32>) = G_LOAD %0(p4) :: (load 32, addrspace 4)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v4i64_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v4i64_uniform
|
2020-04-03 17:06:34 +08:00
|
|
|
; CHECK: (<4 x s64>) = G_LOAD %0(p4) :: (load 32, addrspace 4)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<4 x s64>) = G_LOAD %0 :: (load 32, addrspace 4)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v16i32_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v16i32_uniform
|
2020-04-03 17:06:34 +08:00
|
|
|
; CHECK: (<16 x s32>) = G_LOAD %0(p4) :: (load 64, addrspace 4)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<16 x s32>) = G_LOAD %0 :: (load 64, addrspace 4)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_constant_v8i64_uniform
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_v8i64_uniform
|
2020-04-03 17:06:34 +08:00
|
|
|
; CHECK: (<8 x s64>) = G_LOAD %0(p4) :: (load 64, addrspace 4)
|
AMDGPU/GlobalISel: Add support for wide loads >= 256-bits
Summary:
This adds support for the most commonly used wide load types:
<8xi32>, <16xi32>, <4xi64>, and <8xi64>
Reviewers: arsenm
Reviewed By: arsenm
Subscribers: hiraditya, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, kristof.beyls, dstuttard, tpr, t-tye, volkan, Petar.Avramovic, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D57399
llvm-svn: 365586
2019-07-10 08:22:41 +08:00
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(<8 x s64>) = G_LOAD %0 :: (load 64, addrspace 4)
|
|
|
|
...
|
2019-09-09 23:44:16 +08:00
|
|
|
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_local_uniform
|
2019-09-09 23:44:16 +08:00
|
|
|
legalized: true
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0
|
|
|
|
; CHECK-LABEL: load_local_uniform
|
|
|
|
; CHECK: %0:sgpr(p3) = COPY $sgpr0
|
|
|
|
; CHECK: %2:vgpr(p3) = COPY %0(p3)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 3)
|
|
|
|
|
|
|
|
%0:_(p3) = COPY $sgpr0
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 3)
|
|
|
|
|
|
|
|
...
|
|
|
|
---
|
2019-09-10 00:03:45 +08:00
|
|
|
name: load_region_uniform
|
2019-09-09 23:44:16 +08:00
|
|
|
legalized: true
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0
|
|
|
|
; CHECK-LABEL: load_region_uniform
|
|
|
|
; CHECK: %0:sgpr(p3) = COPY $sgpr0
|
|
|
|
; CHECK: %2:vgpr(p3) = COPY %0(p3)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p3) :: (load 4, addrspace 5)
|
|
|
|
|
|
|
|
%0:_(p3) = COPY $sgpr0
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5)
|
|
|
|
|
|
|
|
...
|
2019-09-10 00:03:45 +08:00
|
|
|
|
|
|
|
---
|
|
|
|
name: extload_constant_i8_to_i32_uniform
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: extload_constant_i8_to_i32_uniform
|
|
|
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 4)
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 1, addrspace 4, align 1)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
|
|
|
name: extload_global_i8_to_i32_uniform
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: extload_global_i8_to_i32_uniform{{$}}
|
|
|
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 1, addrspace 1)
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 1, addrspace 1, align 1)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
|
|
|
name: extload_constant_i16_to_i32_uniform
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: extload_constant_i16_to_i32_uniform
|
|
|
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 4)
|
|
|
|
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 2, addrspace 4, align 2)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
|
|
|
name: extload_global_i16_to_i32_uniform
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: extload_global_i16_to_i32_uniform
|
|
|
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 2, addrspace 1)
|
|
|
|
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 2, addrspace 1, align 2)
|
|
|
|
...
|
2019-09-10 00:06:37 +08:00
|
|
|
|
|
|
|
---
|
|
|
|
name: load_constant_i32_uniform_align4
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_i32_uniform_align4
|
|
|
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: %1:sgpr(s32) = G_LOAD %0(p4) :: (load 4, addrspace 4)
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 4)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
|
|
|
name: load_constant_i32_uniform_align2
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
; CHECK-LABEL: name: load_constant_i32_uniform_align2
|
|
|
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 2, addrspace 4)
|
|
|
|
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 2)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
|
|
|
name: load_constant_i32_uniform_align1
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: load_constant_i32_uniform_align1
|
|
|
|
; CHECK: %0:sgpr(p4) = COPY $sgpr0_sgpr1
|
|
|
|
; CHECK: %2:vgpr(p4) = COPY %0(p4)
|
|
|
|
; CHECK: %1:vgpr(s32) = G_LOAD %2(p4) :: (load 4, align 1, addrspace 4)
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 1)
|
|
|
|
...
|
2019-10-02 09:02:21 +08:00
|
|
|
|
|
|
|
---
|
|
|
|
name: load_private_uniform_sgpr_i32
|
|
|
|
legalized: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: load_private_uniform_sgpr_i32
|
|
|
|
; CHECK: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0
|
|
|
|
; CHECK: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5)
|
|
|
|
; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load 4, addrspace 5)
|
|
|
|
%0:_(p5) = COPY $sgpr0
|
|
|
|
%1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5, align 4)
|
|
|
|
...
|
2019-10-10 06:44:49 +08:00
|
|
|
|
|
|
|
---
|
|
|
|
name: load_constant_v8i32_vgpr_crash
|
|
|
|
legalized: true
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $vgpr0_vgpr1
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash
|
|
|
|
; CHECK: %0:vgpr(p4) = COPY $vgpr0_vgpr1
|
|
|
|
; CHECK: vgpr(<4 x s32>) = G_LOAD %0(p4)
|
|
|
|
; CHECK: vgpr(<4 x s32>) = G_LOAD
|
|
|
|
; CHECK: G_CONCAT_VECTORS
|
|
|
|
%0:_(p4) = COPY $vgpr0_vgpr1
|
|
|
|
%1:_(<8 x s32>) = G_LOAD %0 :: (load 32, addrspace 4)
|
|
|
|
...
|
|
|
|
|
|
|
|
---
|
|
|
|
name: load_constant_v8i32_vgpr_crash_loop_phi
|
|
|
|
legalized: true
|
|
|
|
tracksRegLiveness: true
|
|
|
|
|
|
|
|
body: |
|
|
|
|
bb.0:
|
|
|
|
liveins: $sgpr0_sgpr1, $sgpr2_sgpr3
|
|
|
|
|
|
|
|
; CHECK-LABEL: name: load_constant_v8i32_vgpr_crash_loop_phi
|
|
|
|
; CHECK: G_PHI
|
|
|
|
; CHECK: vgpr(<4 x s32>) = G_LOAD
|
|
|
|
; CHECK: vgpr(<4 x s32>) = G_LOAD
|
|
|
|
; CHECK: G_CONCAT_VECTORS
|
|
|
|
|
|
|
|
%0:_(p4) = COPY $sgpr0_sgpr1
|
|
|
|
%1:_(p4) = COPY $sgpr2_sgpr3
|
|
|
|
G_BR %bb.1
|
|
|
|
|
|
|
|
bb.1:
|
|
|
|
%2:_(p4) = G_PHI %0, %bb.0, %4, %bb.1
|
|
|
|
%3:_(<8 x s32>) = G_LOAD %2 :: (load 32, addrspace 4)
|
|
|
|
%4:_(p4) = COPY %1
|
|
|
|
G_BR %bb.1
|
|
|
|
...
|