forked from OSchip/llvm-project
AMDGPU: Fold readlane from copy of SGPR or imm
These may be inserted to assert uniformity somewhere. llvm-svn: 363670
This commit is contained in:
parent
e75e197ad8
commit
bcb5ea0042
|
@ -506,6 +506,41 @@ void SIFoldOperands::foldOperand(
|
|||
return;
|
||||
}
|
||||
|
||||
unsigned UseOpc = UseMI->getOpcode();
|
||||
if (UseOpc == AMDGPU::V_READFIRSTLANE_B32 ||
|
||||
(UseOpc == AMDGPU::V_READLANE_B32 &&
|
||||
(int)UseOpIdx ==
|
||||
AMDGPU::getNamedOperandIdx(UseOpc, AMDGPU::OpName::src0))) {
|
||||
// %vgpr = V_MOV_B32 imm
|
||||
// %sgpr = V_READFIRSTLANE_B32 %vgpr
|
||||
// =>
|
||||
// %sgpr = S_MOV_B32 imm
|
||||
if (FoldingImm) {
|
||||
if (!isEXECMaskConstantBetweenDefAndUses(
|
||||
UseMI->getOperand(UseOpIdx).getReg(), *MRI))
|
||||
return;
|
||||
|
||||
UseMI->setDesc(TII->get(AMDGPU::S_MOV_B32));
|
||||
UseMI->getOperand(1).ChangeToImmediate(OpToFold.getImm());
|
||||
UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
|
||||
return;
|
||||
}
|
||||
|
||||
if (OpToFold.isReg() && TRI->isSGPRReg(*MRI, OpToFold.getReg())) {
|
||||
if (!isEXECMaskConstantBetweenDefAndUses(
|
||||
UseMI->getOperand(UseOpIdx).getReg(), *MRI))
|
||||
return;
|
||||
|
||||
// %vgpr = COPY %sgpr0
|
||||
// %sgpr1 = V_READFIRSTLANE_B32 %vgpr
|
||||
// =>
|
||||
// %sgpr1 = COPY %sgpr0
|
||||
UseMI->setDesc(TII->get(AMDGPU::COPY));
|
||||
UseMI->RemoveOperand(2); // Remove exec read (or src1 for readlane)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
const MCInstrDesc &UseDesc = UseMI->getDesc();
|
||||
|
||||
// Don't fold into target independent nodes. Target independent opcodes
|
||||
|
|
|
@ -1696,6 +1696,13 @@ def : GCNPat<
|
|||
(S_SUB_I32 $src0, NegSubInlineConst32:$src1)
|
||||
>;
|
||||
|
||||
// Avoid pointlessly materializing a constant in VGPR.
|
||||
// FIXME: Should also do this for readlane, but tablegen crashes on
|
||||
// the ignored src1.
|
||||
def : GCNPat<
|
||||
(int_amdgcn_readfirstlane (i32 imm:$src)),
|
||||
(S_MOV_B32 $src)
|
||||
>;
|
||||
|
||||
multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
|
||||
def : GCNPat <
|
||||
|
|
|
@ -0,0 +1,250 @@
|
|||
# RUN: llc -march=amdgcn -run-pass si-fold-operands -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readfirstlane{{$}}
|
||||
# GCN: %1:sreg_32_xm0 = S_MOV_B32 123
|
||||
---
|
||||
name: fold-imm-readfirstlane
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readfirstlane-readfirstlane{{$}}
|
||||
# GCN: %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
# GCN: %1:sreg_32_xm0 = S_MOV_B32 123
|
||||
# GCN: %2:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
# GCN: %3:sreg_32_xm0 = S_MOV_B32 123
|
||||
|
||||
---
|
||||
name: fold-imm-readfirstlane-readfirstlane
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
%2:vgpr_32 = COPY %1
|
||||
%3:sreg_32_xm0 = V_READFIRSTLANE_B32 %2, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
|
||||
# GCN-LABEL: name: fold-copy-readfirstlane{{$}}
|
||||
# GCN: %0:sreg_32_xm0 = COPY $sgpr10
|
||||
# GCN: %1:vgpr_32 = COPY %0
|
||||
# GCN: %2:sreg_32_xm0 = COPY %1
|
||||
---
|
||||
name: fold-copy-readfirstlane
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10
|
||||
%0:sreg_32_xm0 = COPY $sgpr10
|
||||
%1:vgpr_32 = COPY %0
|
||||
%2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: no-fold-copy-readfirstlane-physreg0{{$}}
|
||||
# GCN: %0:vgpr_32 = COPY $sgpr10
|
||||
# GCN-NEXT: %1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
---
|
||||
name: no-fold-copy-readfirstlane-physreg0
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10
|
||||
%0:vgpr_32 = COPY $sgpr10
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: no-fold-copy-readfirstlane-physreg1{{$}}
|
||||
# GCN: $vgpr0 = COPY $sgpr10
|
||||
# GCN-NEXT: %0:sreg_32_xm0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
|
||||
---
|
||||
name: no-fold-copy-readfirstlane-physreg1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10
|
||||
$vgpr0 = COPY $sgpr10
|
||||
%0:sreg_32_xm0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: no-fold-imm-readfirstlane-physreg{{$}}
|
||||
# GCN: $vgpr0 = V_MOV_B32_e32 123, implicit $exec
|
||||
# GCN-NEXT: V_READFIRSTLANE_B32 $vgpr0, implicit $exec
|
||||
|
||||
---
|
||||
name: no-fold-imm-readfirstlane-physreg
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = V_MOV_B32_e32 123, implicit $exec
|
||||
%0:sreg_32_xm0 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
|
||||
...
|
||||
|
||||
# TODO: This could be folded, if the search for exec modifications was
|
||||
# smarter.
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readfirstlane-cross-block{{$}}
|
||||
# GCN: V_MOV_B32
|
||||
# GCN: V_READFIRSTLANE_B32
|
||||
---
|
||||
name: fold-imm-readfirstlane-cross-block
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
|
||||
bb.1:
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
...
|
||||
|
||||
# TODO: This could be folded, if the search for exec modifications was
|
||||
# smarter.
|
||||
|
||||
# GCN-LABEL: name: fold-copy-readfirstlane-cross-block{{$}}
|
||||
# GCN: V_MOV_B32
|
||||
# GCN: V_READFIRSTLANE_B32
|
||||
---
|
||||
name: fold-copy-readfirstlane-cross-block
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr12
|
||||
%0:sreg_32_xm0 = COPY $sgpr12
|
||||
%1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
|
||||
|
||||
bb.1:
|
||||
%2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-copy-readfirstlane-cross-block-exec-def{{$}}
|
||||
# GCN: V_MOV_B32
|
||||
# GCN: $exec = S_MOV_B64_term
|
||||
# GCN: V_READFIRSTLANE_B32
|
||||
---
|
||||
name: fold-copy-readfirstlane-cross-block-exec-def
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10_sgpr11, $sgpr12
|
||||
%0:sreg_32_xm0 = COPY $sgpr12
|
||||
%1:vgpr_32 = V_MOV_B32_e32 %0, implicit $exec
|
||||
$exec = S_MOV_B64_term $sgpr10_sgpr11
|
||||
|
||||
bb.1:
|
||||
%2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-copy-readfirstlane-same-block-exec-def{{$}}
|
||||
# GCN: COPY
|
||||
# GCN-NEXT: %1:vgpr_32 = COPY %0
|
||||
# GCN-NEXT: $exec = S_MOV_B64
|
||||
# GCN-NEXT: V_READFIRSTLANE_B32
|
||||
---
|
||||
name: fold-copy-readfirstlane-same-block-exec-def
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10_sgpr11, $sgpr12
|
||||
%0:sreg_32_xm0 = COPY $sgpr12
|
||||
%1:vgpr_32 = COPY %0, implicit $exec
|
||||
$exec = S_MOV_B64 $sgpr10_sgpr11
|
||||
%2:sreg_32_xm0 = V_READFIRSTLANE_B32 %1, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readfirstlane-cross-block-exec-def{{$}}
|
||||
# GCN: V_MOV_B32
|
||||
# GCN: $exec = S_MOV_B64
|
||||
# GCN: V_READFIRSTLANE_B32
|
||||
|
||||
---
|
||||
name: fold-imm-readfirstlane-cross-block-exec-def
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10_sgpr11, $sgpr12_sgpr13
|
||||
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
$exec = S_MOV_B64_term $sgpr10_sgpr11
|
||||
|
||||
bb.1:
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readfirstlane-same-block-exec-def{{$}}
|
||||
# GCN: V_MOV_B32
|
||||
# GCN-NEXT: $exec = S_MOV_B64
|
||||
# GCN-NEXT: V_READFIRSTLANE_B32
|
||||
---
|
||||
name: fold-imm-readfirstlane-same-block-exec-def
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10_sgpr11
|
||||
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
$exec = S_MOV_B64 $sgpr10_sgpr11
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-sgpr-copy-readfirstlane-same-block-exec-def{{$}}
|
||||
# GCN: COPY
|
||||
# GCN-NEXT: $exec = S_MOV_B64
|
||||
# GCN-NEXT: V_READFIRSTLANE_B32
|
||||
---
|
||||
name: fold-sgpr-copy-readfirstlane-same-block-exec-def
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $sgpr10_sgpr11, $sgpr12
|
||||
%0:vgpr_32 = COPY $sgpr12
|
||||
$exec = S_MOV_B64 $sgpr10_sgpr11
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readfirstlane-user{{$}}
|
||||
# GCN: %3:sreg_32_xm0 = S_MOV_B32 123
|
||||
---
|
||||
name: fold-imm-readfirstlane-user
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
%1:sreg_32_xm0 = V_READFIRSTLANE_B32 %0, implicit $exec
|
||||
%2:sreg_32_xm0 = COPY %1
|
||||
%3:sreg_32_xm0 = COPY %2
|
||||
S_ENDPGM 0, implicit %3
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readlane{{$}}
|
||||
# GCN: %1:sreg_32_xm0 = S_MOV_B32 123
|
||||
---
|
||||
name: fold-imm-readlane
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
%0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
|
||||
%1:sreg_32_xm0 = V_READLANE_B32 %0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: fold-imm-readlane-src1{{$}}
|
||||
# GCN: %0:vgpr_32 = COPY $vgpr0
|
||||
# GCN: V_READLANE_B32 %0, 12, implicit $exec
|
||||
---
|
||||
name: fold-imm-readlane-src1
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%1:sreg_32_xm0 = S_MOV_B32 12
|
||||
%2:sreg_32_xm0 = V_READLANE_B32 %0, %1, implicit $exec
|
||||
...
|
|
@ -1,19 +1,30 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
|
||||
|
||||
declare i32 @llvm.amdgcn.readfirstlane(i32) #0
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readfirstlane:
|
||||
; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, v{{[0-9]+}}
|
||||
define amdgpu_kernel void @test_readfirstlane(i32 addrspace(1)* %out, i32 %src) #1 {
|
||||
; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, v2
|
||||
define void @test_readfirstlane(i32 addrspace(1)* %out, i32 %src) #1 {
|
||||
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %src)
|
||||
store i32 %readfirstlane, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readfirstlane_imm:
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], 32
|
||||
; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, [[VVAL]]
|
||||
; CHECK: s_mov_b32 [[SGPR_VAL:s[0-9]]], 32
|
||||
; CHECK-NOT: [[SGPR_VAL]]
|
||||
; CHECK: ; use [[SGPR_VAL]]
|
||||
define amdgpu_kernel void @test_readfirstlane_imm(i32 addrspace(1)* %out) #1 {
|
||||
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32)
|
||||
call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readfirstlane_imm_fold:
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], 32
|
||||
; CHECK-NOT: [[VVAL]]
|
||||
; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]]
|
||||
define amdgpu_kernel void @test_readfirstlane_imm_fold(i32 addrspace(1)* %out) #1 {
|
||||
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 32)
|
||||
store i32 %readfirstlane, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
|
@ -24,7 +35,7 @@ define amdgpu_kernel void @test_readfirstlane_imm(i32 addrspace(1)* %out) #1 {
|
|||
; CHECK: s_mov_b32 m0, -1
|
||||
; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
|
||||
; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, [[VVAL]]
|
||||
; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]]
|
||||
define amdgpu_kernel void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 {
|
||||
%m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
|
||||
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %m0)
|
||||
|
@ -32,5 +43,32 @@ define amdgpu_kernel void @test_readfirstlane_m0(i32 addrspace(1)* %out) #1 {
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readfirstlane_copy_from_sgpr:
|
||||
; CHECK: ;;#ASMSTART
|
||||
; CHECK-NEXT: s_mov_b32 [[SGPR:s[0-9]+]]
|
||||
; CHECK: ;;#ASMEND
|
||||
; CHECK-NOT: [[SGPR]]
|
||||
; CHECK-NOT: readfirstlane
|
||||
; CHECK: v_mov_b32_e32 [[VCOPY:v[0-9]+]], [[SGPR]]
|
||||
; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VCOPY]]
|
||||
define amdgpu_kernel void @test_readfirstlane_copy_from_sgpr(i32 addrspace(1)* %out) #1 {
|
||||
%sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
|
||||
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %sgpr)
|
||||
store i32 %readfirstlane, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; Make sure this doesn't crash.
|
||||
; CHECK-LABEL: {{^}}test_readfirstlane_fi:
|
||||
; CHECK: v_mov_b32_e32 [[FIVAL:v[0-9]]], 4
|
||||
; CHECK: v_readfirstlane_b32 s{{[0-9]+}}, [[FIVAL]]
|
||||
define amdgpu_kernel void @test_readfirstlane_fi(i32 addrspace(1)* %out) #1 {
|
||||
%alloca = alloca i32, addrspace(5)
|
||||
%int = ptrtoint i32 addrspace(5)* %alloca to i32
|
||||
%readfirstlane = call i32 @llvm.amdgcn.readfirstlane(i32 %int)
|
||||
call void asm sideeffect "; use $0", "s"(i32 %readfirstlane)
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind readnone convergent }
|
||||
attributes #1 = { nounwind }
|
||||
|
|
|
@ -1,18 +1,26 @@
|
|||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope %s
|
||||
|
||||
declare i32 @llvm.amdgcn.readlane(i32, i32) #0
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readlane_sreg:
|
||||
; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
define amdgpu_kernel void @test_readlane_sreg(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
|
||||
; CHECK-LABEL: {{^}}test_readlane_sreg_sreg:
|
||||
; CHECK-NOT: v_readlane_b32
|
||||
define amdgpu_kernel void @test_readlane_sreg_sreg(i32 %src0, i32 %src1) #1 {
|
||||
%readlane = call i32 @llvm.amdgcn.readlane(i32 %src0, i32 %src1)
|
||||
store i32 %readlane, i32 addrspace(1)* %out, align 4
|
||||
call void asm sideeffect "; use $0", "s"(i32 %readlane)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readlane_vreg_sreg:
|
||||
; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}
|
||||
define amdgpu_kernel void @test_readlane_vreg_sreg(i32 %src0, i32 %src1) #1 {
|
||||
%vgpr = call i32 asm sideeffect "; def $0", "=v"()
|
||||
%readlane = call i32 @llvm.amdgcn.readlane(i32 %vgpr, i32 %src1)
|
||||
call void asm sideeffect "; use $0", "s"(i32 %readlane)
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readlane_imm_sreg:
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], 32
|
||||
; CHECK: v_readlane_b32 s{{[0-9]+}}, [[VVAL]], s{{[0-9]+}}
|
||||
; CHECK-NOT: v_readlane_b32
|
||||
define amdgpu_kernel void @test_readlane_imm_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
|
||||
%readlane = call i32 @llvm.amdgcn.readlane(i32 32, i32 %src1)
|
||||
store i32 %readlane, i32 addrspace(1)* %out, align 4
|
||||
|
@ -38,7 +46,7 @@ define amdgpu_kernel void @test_readlane_vregs(i32 addrspace(1)* %out, <2 x i32>
|
|||
; CHECK: s_mov_b32 m0, -1
|
||||
; CHECK: s_mov_b32 [[COPY_M0:s[0-9]+]], m0
|
||||
; CHECK: v_mov_b32_e32 [[VVAL:v[0-9]]], [[COPY_M0]]
|
||||
; CHECK: v_readlane_b32 s{{[0-9]+}}, [[VVAL]], s{{[0-9]+}}
|
||||
; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VVAL]]
|
||||
define amdgpu_kernel void @test_readlane_m0_sreg(i32 addrspace(1)* %out, i32 %src1) #1 {
|
||||
%m0 = call i32 asm "s_mov_b32 m0, -1", "={m0}"()
|
||||
%readlane = call i32 @llvm.amdgcn.readlane(i32 %m0, i32 %src1)
|
||||
|
@ -46,14 +54,30 @@ define amdgpu_kernel void @test_readlane_m0_sreg(i32 addrspace(1)* %out, i32 %sr
|
|||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readlane_imm:
|
||||
; CHECK-LABEL: {{^}}test_readlane_vgpr_imm:
|
||||
; CHECK: v_readlane_b32 s{{[0-9]+}}, v{{[0-9]+}}, 32
|
||||
define amdgpu_kernel void @test_readlane_imm(i32 addrspace(1)* %out, i32 %src0) #1 {
|
||||
%readlane = call i32 @llvm.amdgcn.readlane(i32 %src0, i32 32) #0
|
||||
define amdgpu_kernel void @test_readlane_vgpr_imm(i32 addrspace(1)* %out) #1 {
|
||||
%vgpr = call i32 asm sideeffect "; def $0", "=v"()
|
||||
%readlane = call i32 @llvm.amdgcn.readlane(i32 %vgpr, i32 32) #0
|
||||
store i32 %readlane, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
; CHECK-LABEL: {{^}}test_readlane_copy_from_sgpr:
|
||||
; CHECK: ;;#ASMSTART
|
||||
; CHECK-NEXT: s_mov_b32 [[SGPR:s[0-9]+]]
|
||||
; CHECK: ;;#ASMEND
|
||||
; CHECK-NOT: [[SGPR]]
|
||||
; CHECK-NOT: readlane
|
||||
; CHECK: v_mov_b32_e32 [[VCOPY:v[0-9]+]], [[SGPR]]
|
||||
; CHECK: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VCOPY]]
|
||||
define amdgpu_kernel void @test_readlane_copy_from_sgpr(i32 addrspace(1)* %out) #1 {
|
||||
%sgpr = call i32 asm "s_mov_b32 $0, 0", "=s"()
|
||||
%readfirstlane = call i32 @llvm.amdgcn.readlane(i32 %sgpr, i32 7)
|
||||
store i32 %readfirstlane, i32 addrspace(1)* %out, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.amdgcn.workitem.id.x() #2
|
||||
|
||||
attributes #0 = { nounwind readnone convergent }
|
||||
|
|
Loading…
Reference in New Issue