[AMDGPU] Fix-up cases where writelane has 2 SGPR operands
Summary:
Even though writelane doesn't have the same constraints as other valu
instructions it still can't violate the >1 SGPR operand constraint
Due to later register propagation (e.g. fixing up vgpr operands via
readfirstlane) changing writelane to only have a single SGPR is tricky.
This implementation puts a new check after SIFixSGPRCopies that prevents
multiple SGPRs being used in any writelane instructions.
The algorithm used is to check for trivial copy prop of suitable constants into
one of the SGPR operands and perform that if possible. If this isn't possible
put an explicit copy of Src1 SGPR into M0 and use that instead (this is
allowable for writelane as the constraint is for SGPR read-port and not
constant-bus access).
Reviewers: rampitec, tpr, arsenm, nhaehnle
Reviewed By: rampitec, arsenm, nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, mgorny, yaxunl, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D51932
Change-Id: Ic7553fa57440f208d4dbc4794fc24345d7e0e9ea
llvm-svn: 375004
2019-10-16 22:37:39 +08:00
|
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,CI,CIGFX9 %s
|
|
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx802 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX9,CIGFX9 %s
|
|
|
|
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,GFX10 %s
|
2018-03-01 03:10:32 +08:00
|
|
|
|
|
|
|
declare i32 @llvm.amdgcn.writelane(i32, i32, i32) #0
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}test_writelane_sreg:
|
[AMDGPU] Fix-up cases where writelane has 2 SGPR operands
Summary:
Even though writelane doesn't have the same constraints as other valu
instructions it still can't violate the >1 SGPR operand constraint
Due to later register propagation (e.g. fixing up vgpr operands via
readfirstlane) changing writelane to only have a single SGPR is tricky.
This implementation puts a new check after SIFixSGPRCopies that prevents
multiple SGPRs being used in any writelane instructions.
The algorithm used is to check for trivial copy prop of suitable constants into
one of the SGPR operands and perform that if possible. If this isn't possible
put an explicit copy of Src1 SGPR into M0 and use that instead (this is
allowable for writelane as the constraint is for SGPR read-port and not
constant-bus access).
Reviewers: rampitec, tpr, arsenm, nhaehnle
Reviewed By: rampitec, arsenm, nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, mgorny, yaxunl, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D51932
Change-Id: Ic7553fa57440f208d4dbc4794fc24345d7e0e9ea
llvm-svn: 375004
2019-10-16 22:37:39 +08:00
|
|
|
; CIGFX9: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, m0
|
|
|
|
; GFX10: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}
|
2018-03-01 03:10:32 +08:00
|
|
|
define amdgpu_kernel void @test_writelane_sreg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
|
|
|
|
%oldval = load i32, i32 addrspace(1)* %out
|
|
|
|
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
|
|
|
|
store i32 %writelane, i32 addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}test_writelane_imm_sreg:
|
|
|
|
; CHECK: v_writelane_b32 v{{[0-9]+}}, 32, s{{[0-9]+}}
|
|
|
|
define amdgpu_kernel void @test_writelane_imm_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
|
|
|
|
%oldval = load i32, i32 addrspace(1)* %out
|
|
|
|
%writelane = call i32 @llvm.amdgcn.writelane(i32 32, i32 %src1, i32 %oldval)
|
|
|
|
store i32 %writelane, i32 addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}test_writelane_vreg_lane:
|
|
|
|
; CHECK: v_readfirstlane_b32 [[LANE:s[0-9]+]], v{{[0-9]+}}
|
|
|
|
; CHECK: v_writelane_b32 v{{[0-9]+}}, 12, [[LANE]]
|
|
|
|
define amdgpu_kernel void @test_writelane_vreg_lane(i32 addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #1 {
|
|
|
|
%tid = call i32 @llvm.amdgcn.workitem.id.x()
|
|
|
|
%gep.in = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 %tid
|
|
|
|
%args = load <2 x i32>, <2 x i32> addrspace(1)* %gep.in
|
|
|
|
%oldval = load i32, i32 addrspace(1)* %out
|
|
|
|
%lane = extractelement <2 x i32> %args, i32 1
|
|
|
|
%writelane = call i32 @llvm.amdgcn.writelane(i32 12, i32 %lane, i32 %oldval)
|
|
|
|
store i32 %writelane, i32 addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}test_writelane_m0_sreg:
|
|
|
|
; CHECK: s_mov_b32 m0, -1
|
2019-10-19 02:26:37 +08:00
|
|
|
; CIGFX9: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
|
[AMDGPU] Fix-up cases where writelane has 2 SGPR operands
Summary:
Even though writelane doesn't have the same constraints as other valu
instructions it still can't violate the >1 SGPR operand constraint
Due to later register propagation (e.g. fixing up vgpr operands via
readfirstlane) changing writelane to only have a single SGPR is tricky.
This implementation puts a new check after SIFixSGPRCopies that prevents
multiple SGPRs being used in any writelane instructions.
The algorithm used is to check for trivial copy prop of suitable constants into
one of the SGPR operands and perform that if possible. If this isn't possible
put an explicit copy of Src1 SGPR into M0 and use that instead (this is
allowable for writelane as the constraint is for SGPR read-port and not
constant-bus access).
Reviewers: rampitec, tpr, arsenm, nhaehnle
Reviewed By: rampitec, arsenm, nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, mgorny, yaxunl, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D51932
Change-Id: Ic7553fa57440f208d4dbc4794fc24345d7e0e9ea
llvm-svn: 375004
2019-10-16 22:37:39 +08:00
|
|
|
; CIGFX9: v_writelane_b32 v{{[0-9]+}}, [[COPY_M0]], m0
|
2019-10-19 02:26:37 +08:00
|
|
|
; GFX10: v_writelane_b32 v{{[0-9]+}}, m0, s{{[0-9]+}}
|
2018-03-01 03:10:32 +08:00
|
|
|
define amdgpu_kernel void @test_writelane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
|
|
|
|
%oldval = load i32, i32 addrspace(1)* %out
|
2019-06-15 05:16:06 +08:00
|
|
|
%m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
|
2018-03-01 03:10:32 +08:00
|
|
|
%writelane = call i32 @llvm.amdgcn.writelane(i32 %m0, i32 %src1, i32 %oldval)
|
|
|
|
store i32 %writelane, i32 addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}test_writelane_imm:
|
|
|
|
; CHECK: v_writelane_b32 v{{[0-9]+}}, s{{[0-9]+}}, 32
|
|
|
|
define amdgpu_kernel void @test_writelane_imm(i32 addrspace(1)* %out, i32 %src0) #1 {
|
|
|
|
%oldval = load i32, i32 addrspace(1)* %out
|
|
|
|
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 32, i32 %oldval) #0
|
|
|
|
store i32 %writelane, i32 addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}test_writelane_sreg_oldval:
|
|
|
|
; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], s{{[0-9]+}}
|
[AMDGPU] Fix-up cases where writelane has 2 SGPR operands
Summary:
Even though writelane doesn't have the same constraints as other valu
instructions it still can't violate the >1 SGPR operand constraint
Due to later register propagation (e.g. fixing up vgpr operands via
readfirstlane) changing writelane to only have a single SGPR is tricky.
This implementation puts a new check after SIFixSGPRCopies that prevents
multiple SGPRs being used in any writelane instructions.
The algorithm used is to check for trivial copy prop of suitable constants into
one of the SGPR operands and perform that if possible. If this isn't possible
put an explicit copy of Src1 SGPR into M0 and use that instead (this is
allowable for writelane as the constraint is for SGPR read-port and not
constant-bus access).
Reviewers: rampitec, tpr, arsenm, nhaehnle
Reviewed By: rampitec, arsenm, nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, mgorny, yaxunl, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D51932
Change-Id: Ic7553fa57440f208d4dbc4794fc24345d7e0e9ea
llvm-svn: 375004
2019-10-16 22:37:39 +08:00
|
|
|
; CIGFX9: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0
|
|
|
|
; GFX10: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
|
2018-03-01 03:10:32 +08:00
|
|
|
define amdgpu_kernel void @test_writelane_sreg_oldval(i32 inreg %oldval, i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
|
|
|
|
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 %oldval)
|
|
|
|
store i32 %writelane, i32 addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
; CHECK-LABEL: {{^}}test_writelane_imm_oldval:
|
|
|
|
; CHECK: v_mov_b32_e32 [[OLDVAL:v[0-9]+]], 42
|
[AMDGPU] Fix-up cases where writelane has 2 SGPR operands
Summary:
Even though writelane doesn't have the same constraints as other valu
instructions it still can't violate the >1 SGPR operand constraint
Due to later register propagation (e.g. fixing up vgpr operands via
readfirstlane) changing writelane to only have a single SGPR is tricky.
This implementation puts a new check after SIFixSGPRCopies that prevents
multiple SGPRs being used in any writelane instructions.
The algorithm used is to check for trivial copy prop of suitable constants into
one of the SGPR operands and perform that if possible. If this isn't possible
put an explicit copy of Src1 SGPR into M0 and use that instead (this is
allowable for writelane as the constraint is for SGPR read-port and not
constant-bus access).
Reviewers: rampitec, tpr, arsenm, nhaehnle
Reviewed By: rampitec, arsenm, nhaehnle
Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, mgorny, yaxunl, tpr, t-tye, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D51932
Change-Id: Ic7553fa57440f208d4dbc4794fc24345d7e0e9ea
llvm-svn: 375004
2019-10-16 22:37:39 +08:00
|
|
|
; CIGFX9: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, m0
|
|
|
|
; GFX10: v_writelane_b32 [[OLDVAL]], s{{[0-9]+}}, s{{[0-9]+}}
|
2018-03-01 03:10:32 +08:00
|
|
|
define amdgpu_kernel void @test_writelane_imm_oldval(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
|
|
|
|
%writelane = call i32 @llvm.amdgcn.writelane(i32 %src0, i32 %src1, i32 42)
|
|
|
|
store i32 %writelane, i32 addrspace(1)* %out, align 4
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
declare i32 @llvm.amdgcn.workitem.id.x() #2
|
|
|
|
|
|
|
|
attributes #0 = { nounwind readnone convergent }
|
|
|
|
attributes #1 = { nounwind }
|
|
|
|
attributes #2 = { nounwind readnone }
|