forked from OSchip/llvm-project
[LSR] Fix for pre-indexed generated constant offset
This patch changed the isLegalUse check to ensure that LSRInstance::GenerateConstantOffsetsImpl generates an offset that results in a legal addressing mode and formula. The check is changed to look similar to the assert check used for illegal formulas. Differential Revision: https://reviews.llvm.org/D100383 Change-Id: Iffb9e32d59df96b8f072c00f6c339108159a009a
This commit is contained in:
parent
17cec07184
commit
bf147c4653
|
@ -3792,8 +3792,7 @@ void LSRInstance::GenerateConstantOffsetsImpl(
|
||||||
Formula F = Base;
|
Formula F = Base;
|
||||||
F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
|
F.BaseOffset = (uint64_t)Base.BaseOffset - Offset;
|
||||||
|
|
||||||
if (isLegalUse(TTI, LU.MinOffset - Offset, LU.MaxOffset - Offset, LU.Kind,
|
if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {
|
||||||
LU.AccessTy, F)) {
|
|
||||||
// Add the offset to the base register.
|
// Add the offset to the base register.
|
||||||
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
|
const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), Offset), G);
|
||||||
// If it cancelled out, drop the base register, otherwise update it.
|
// If it cancelled out, drop the base register, otherwise update it.
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s
|
; RUN: llc -march=amdgcn -verify-machineinstrs -O0 < %s
|
||||||
|
|
||||||
; GCN-LABEL: {{^}}test_loop:
|
; GCN-LABEL: {{^}}test_loop:
|
||||||
; GCN: s_and_b64 vcc, exec, -1
|
; GCN: s_and_b64 s[0:1], exec, -1
|
||||||
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]: ; %for.body{{$}}
|
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]: ; %for.body{{$}}
|
||||||
; GCN: ds_read_b32
|
; GCN: ds_read_b32
|
||||||
; GCN: ds_write_b32
|
; GCN: ds_write_b32
|
||||||
|
@ -97,10 +97,10 @@ for.body:
|
||||||
; GCN-LABEL: {{^}}loop_arg_0:
|
; GCN-LABEL: {{^}}loop_arg_0:
|
||||||
; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
; GCN: v_and_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
|
||||||
; GCN: v_cmp_eq_u32{{[^,]*}}, 1,
|
; GCN: v_cmp_eq_u32{{[^,]*}}, 1,
|
||||||
|
; GCN: s_add_i32 s2, s0, 0x80
|
||||||
|
|
||||||
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
|
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
|
||||||
; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80
|
; GCN: _add_i32_e32 v0, vcc, 4, v0
|
||||||
; GCN: s_add_i32 s{{[0-9]+}}, s{{[0-9]+}}, 4
|
|
||||||
|
|
||||||
; GCN: s_cbranch_{{vccz|vccnz}} [[LOOPBB]]
|
; GCN: s_cbranch_{{vccz|vccnz}} [[LOOPBB]]
|
||||||
; GCN-NEXT: ; %bb.2
|
; GCN-NEXT: ; %bb.2
|
||||||
|
|
|
@ -0,0 +1,53 @@
|
||||||
|
; RUN: llc -mtriple=aarch64-none-eabi -lsr-preferred-addressing-mode=preindexed %s -o - | FileCheck %s
|
||||||
|
|
||||||
|
; In LSR for constant offsets and steps, we can generate pre-inc
|
||||||
|
; accesses by having the offset equal the step and generate a reuse
|
||||||
|
; formula. However, there are cases where the step, results in an
|
||||||
|
; illegal addressing mode.
|
||||||
|
|
||||||
|
; In this test, we set the preferred addressing mode to be preindexed,
|
||||||
|
; in order to test a scenario where the step results in an illegal
|
||||||
|
; addressing mode and because of that it should not generate a reuse formula.
|
||||||
|
|
||||||
|
; This test was created in order to reproduce a bug that was observed
|
||||||
|
; when building a bootstrap build on an AArch64 machine, where the
|
||||||
|
; preferred addresing mode is preindexed.
|
||||||
|
|
||||||
|
|
||||||
|
%"Type" = type <{[166 x [338 x i8]]}>
|
||||||
|
define void @test_lsr_pre_inc_offset_check(%"Type"* %p) {
|
||||||
|
; CHECK-LABEL: test_lsr_pre_inc_offset_check:
|
||||||
|
; CHECK: // %bb.0: // %entry
|
||||||
|
; CHECK-NEXT: add x8, x0, #340 // =340
|
||||||
|
; CHECK-NEXT: mov w9, #165
|
||||||
|
; CHECK-NEXT: mov w10, #2
|
||||||
|
; CHECK-NEXT: .LBB0_1: // %main
|
||||||
|
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
|
||||||
|
; CHECK-NEXT: stur wzr, [x8, #-1]
|
||||||
|
; CHECK-NEXT: strb w10, [x8]
|
||||||
|
; CHECK-NEXT: subs x9, x9, #1 // =1
|
||||||
|
; CHECK-NEXT: add x8, x8, #338 // =338
|
||||||
|
; CHECK-NEXT: b.ne .LBB0_1
|
||||||
|
; CHECK-NEXT: // %bb.2: // %exit
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
entry:
|
||||||
|
br label %main
|
||||||
|
exit:
|
||||||
|
ret void
|
||||||
|
if.then:
|
||||||
|
%arrayidx.i = getelementptr inbounds %"Type", %"Type"* %p, i64 0, i32 0, i64 %indvars, i64 1
|
||||||
|
%0 = bitcast i8* %arrayidx.i to i32*
|
||||||
|
store i32 0, i32* %0, align 1
|
||||||
|
br label %if.end
|
||||||
|
if.end:
|
||||||
|
%arrayidx.p = getelementptr inbounds %"Type", %"Type"* %p, i64 0, i32 0, i64 %indvars, i64 2
|
||||||
|
store i8 2, i8* %arrayidx.p, align 1
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars, 1
|
||||||
|
%add.i = add nuw i8 %begin, 1
|
||||||
|
%cmp.i.not = icmp eq i64 %indvars.iv.next, 166
|
||||||
|
br i1 %cmp.i.not, label %exit, label %main
|
||||||
|
main:
|
||||||
|
%begin = phi i8 [ 1, %entry ], [ %add.i, %if.end ]
|
||||||
|
%indvars = phi i64 [ 1, %entry ], [ %indvars.iv.next, %if.end ]
|
||||||
|
br label %if.then
|
||||||
|
}
|
|
@ -6,16 +6,17 @@ target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:3
|
||||||
|
|
||||||
; OPT-LABEL: @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(
|
; OPT-LABEL: @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(
|
||||||
|
|
||||||
; OPT-NOT: getelementptr
|
; OPT: .lr.ph.preheader:
|
||||||
|
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
|
||||||
|
; OPT: br label %.lr.ph
|
||||||
; OPT: .lr.ph:
|
; OPT: .lr.ph:
|
||||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
; OPT: %tmp4 = atomicrmw add i32 addrspace(3)* %lsr.iv3, i32 undef seq_cst, align 4
|
||||||
; OPT: %tmp4 = atomicrmw add i32 addrspace(3)* %scevgep4, i32 undef seq_cst
|
; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst, align 4
|
||||||
; OPT: %tmp7 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 undef seq_cst
|
; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst, align 4
|
||||||
; OPT: %0 = atomicrmw add i32 addrspace(3)* %lsr.iv1, i32 %tmp8 seq_cst
|
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
|
||||||
; OPT: br i1 %exitcond
|
; OPT: br i1 %exitcond
|
||||||
define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
define amdgpu_kernel void @test_local_atomicrmw_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||||
bb:
|
bb:
|
||||||
|
@ -46,14 +47,16 @@ bb:
|
||||||
}
|
}
|
||||||
|
|
||||||
; OPT-LABEL: test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(
|
; OPT-LABEL: test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(
|
||||||
; OPT-NOT: getelementptr
|
|
||||||
|
|
||||||
|
; OPT: .lr.ph.preheader:
|
||||||
|
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
|
||||||
|
; OPT: br label %.lr.ph
|
||||||
; OPT: .lr.ph:
|
; OPT: .lr.ph:
|
||||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %lsr.iv3, i32 undef, i32 undef seq_cst monotonic, align 4
|
||||||
; OPT: %tmp4 = cmpxchg i32 addrspace(3)* %scevgep4, i32 undef, i32 undef seq_cst monotonic
|
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
|
||||||
define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
define amdgpu_kernel void @test_local_cmpxchg_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||||
bb:
|
bb:
|
||||||
%tmp = icmp sgt i32 %n, 0
|
%tmp = icmp sgt i32 %n, 0
|
||||||
|
@ -85,15 +88,17 @@ bb:
|
||||||
}
|
}
|
||||||
|
|
||||||
; OPT-LABEL: @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(
|
; OPT-LABEL: @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(
|
||||||
; OPT-NOT: getelementptr
|
|
||||||
|
|
||||||
|
; OPT: .lr.ph.preheader:
|
||||||
|
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
|
||||||
|
; OPT: br label %.lr.ph
|
||||||
; OPT: .lr.ph:
|
; OPT: .lr.ph:
|
||||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
|
||||||
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %scevgep4, i32 undef, i32 0, i32 0, i1 false)
|
|
||||||
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
||||||
|
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
|
||||||
define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
define amdgpu_kernel void @test_local_atomicinc_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||||
bb:
|
bb:
|
||||||
%tmp = icmp sgt i32 %n, 0
|
%tmp = icmp sgt i32 %n, 0
|
||||||
|
@ -123,15 +128,16 @@ bb:
|
||||||
}
|
}
|
||||||
|
|
||||||
; OPT-LABEL: @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(
|
; OPT-LABEL: @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(
|
||||||
; OPT-NOT: getelementptr
|
; OPT: .lr.ph.preheader:
|
||||||
|
; OPT: %scevgep2 = getelementptr i32, i32 addrspace(3)* %arg1, i32 16383
|
||||||
|
; OPT: br label %.lr.ph
|
||||||
; OPT: .lr.ph:
|
; OPT: .lr.ph:
|
||||||
; OPT: %lsr.iv2 = phi i32 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
; OPT: %lsr.iv3 = phi i32 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
; OPT: %lsr.iv1 = phi i32 addrspace(3)* [ %scevgep, %.lr.ph ], [ %arg0, %.lr.ph.preheader ]
|
||||||
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
; OPT: %lsr.iv = phi i32 [ %lsr.iv.next, %.lr.ph ], [ %n, %.lr.ph.preheader ]
|
||||||
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv2, i32 16383
|
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv3, i32 undef, i32 0, i32 0, i1 false)
|
||||||
; OPT: %tmp4 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %scevgep4, i32 undef, i32 0, i32 0, i1 false)
|
|
||||||
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
; OPT: %tmp7 = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %lsr.iv1, i32 undef, i32 0, i32 0, i1 false)
|
||||||
|
; OPT: %scevgep4 = getelementptr i32, i32 addrspace(3)* %lsr.iv3, i32 1
|
||||||
define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
define amdgpu_kernel void @test_local_atomicdec_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(3)* noalias nocapture %arg0, i32 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||||
bb:
|
bb:
|
||||||
%tmp = icmp sgt i32 %n, 0
|
%tmp = icmp sgt i32 %n, 0
|
||||||
|
|
|
@ -6,10 +6,13 @@
|
||||||
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
|
||||||
|
|
||||||
; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
|
; OPT-LABEL: @test_global_addressing_loop_uniform_index_max_offset_i32(
|
||||||
|
; OPT: .lr.ph.preheader:
|
||||||
|
; OPT: %scevgep2 = getelementptr i8, i8 addrspace(1)* %arg1, i64 4095
|
||||||
|
; OPT: br label %.lr.ph
|
||||||
; OPT: {{^}}.lr.ph:
|
; OPT: {{^}}.lr.ph:
|
||||||
; OPT: %lsr.iv2 = phi i8 addrspace(1)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
; OPT: %lsr.iv3 = phi i8 addrspace(1)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||||
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv2, i64 4095
|
; OPT: load i8, i8 addrspace(1)* %lsr.iv3, align 1
|
||||||
; OPT: load i8, i8 addrspace(1)* %scevgep4, align 1
|
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(1)* %lsr.iv3, i64 1
|
||||||
define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
define amdgpu_kernel void @test_global_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(1)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||||
bb:
|
bb:
|
||||||
%tmp = icmp sgt i32 %n, 0
|
%tmp = icmp sgt i32 %n, 0
|
||||||
|
@ -79,10 +82,13 @@ bb:
|
||||||
}
|
}
|
||||||
|
|
||||||
; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
|
; OPT-LABEL: @test_local_addressing_loop_uniform_index_max_offset_i32(
|
||||||
|
; OPT: .lr.ph.preheader:
|
||||||
|
; OPT: %scevgep2 = getelementptr i8, i8 addrspace(3)* %arg1, i32 65535
|
||||||
|
; OPT: br label %.lr.ph
|
||||||
; OPT: {{^}}.lr.ph
|
; OPT: {{^}}.lr.ph
|
||||||
; OPT: %lsr.iv2 = phi i8 addrspace(3)* [ %scevgep3, %.lr.ph ], [ %arg1, %.lr.ph.preheader ]
|
; OPT: %lsr.iv3 = phi i8 addrspace(3)* [ %scevgep4, %.lr.ph ], [ %scevgep2, %.lr.ph.preheader ]
|
||||||
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv2, i32 65535
|
; OPT: %tmp4 = load i8, i8 addrspace(3)* %lsr.iv3, align 1
|
||||||
; OPT: %tmp4 = load i8, i8 addrspace(3)* %scevgep4, align 1
|
; OPT: %scevgep4 = getelementptr i8, i8 addrspace(3)* %lsr.iv3, i32 1
|
||||||
define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
define amdgpu_kernel void @test_local_addressing_loop_uniform_index_max_offset_i32(i32 addrspace(1)* noalias nocapture %arg0, i8 addrspace(3)* noalias nocapture readonly %arg1, i32 %n) #0 {
|
||||||
bb:
|
bb:
|
||||||
%tmp = icmp sgt i32 %n, 0
|
%tmp = icmp sgt i32 %n, 0
|
||||||
|
|
Loading…
Reference in New Issue