forked from OSchip/llvm-project
parent
988df63525
commit
65e43cade8
|
@ -1,11 +0,0 @@
|
|||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s
|
||||
; Just check the target feature and data layout is accepted without error.
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
; RUN: llc -march=amdgcn -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s
|
||||
; Just check the target feature and data layout is accepted without error.
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
target triple = "amdgcn-amd-amdhsa"
|
||||
|
||||
define void @foo() {
|
||||
entry:
|
||||
ret void
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
|
||||
; CHECK-LABEL: atomic_fence
|
||||
; CHECK: %bb.0:
|
||||
; CHECK-NOT: ATOMIC_FENCE
|
||||
; CHECK-NEXT: s_waitcnt vmcnt(0)
|
||||
; CHECK-NEXT: buffer_wbinvl1_vol
|
||||
; CHECK-NEXT: s_endpgm
|
||||
define amdgpu_kernel void @atomic_fence() {
|
||||
fence acquire
|
||||
ret void
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
; RUN: llc -mtriple=amdgcn-- -mcpu=kaveri -verify-machineinstrs < %s | FileCheck %s
|
||||
;
|
||||
; The original OpenCL kernel:
|
||||
; kernel void f(global int *a, int i, int j) {
|
||||
; int x[100];
|
||||
; x[i] = 7;
|
||||
; a[0] = x[j];
|
||||
; }
|
||||
; clang -cc1 -triple amdgcn--cl -emit-llvm -o -
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
|
||||
define amdgpu_kernel void @f(i32 addrspace(1)* nocapture %a, i32 %i, i32 %j) local_unnamed_addr #0 {
|
||||
entry:
|
||||
; CHECK: s_load_dwordx2 s[4:5], s[0:1], 0x9
|
||||
; CHECK: s_load_dwordx2 s[0:1], s[0:1], 0xb
|
||||
; CHECK: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
|
||||
; CHECK: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
|
||||
; CHECK: s_mov_b32 s10, -1
|
||||
; CHECK: v_mov_b32_e32 v0, 4
|
||||
; CHECK: s_waitcnt lgkmcnt(0)
|
||||
; CHECK: s_lshl_b32 s0, s0, 2
|
||||
; CHECK: v_add_i32_e32 v1, vcc, s0, v0
|
||||
; CHECK: s_lshl_b32 s0, s1, 2
|
||||
; CHECK: s_mov_b32 s11, 0xe8f000
|
||||
; CHECK: v_mov_b32_e32 v2, 7
|
||||
; CHECK: buffer_store_dword v2, v1, s[8:11], s3 offen
|
||||
; CHECK: v_add_i32_e32 v0, vcc, s0, v0
|
||||
; CHECK: s_mov_b32 s7, 0xf000
|
||||
; CHECK: s_mov_b32 s6, -1
|
||||
; CHECK: buffer_load_dword v0, v0, s[8:11], s3 offen
|
||||
; CHECK: s_waitcnt vmcnt(0)
|
||||
; CHECK: buffer_store_dword v0, off, s[4:7], 0
|
||||
; CHECK: s_endpgm
|
||||
|
||||
%x = alloca [100 x i32], align 4, addrspace(5)
|
||||
%alloca.bc = bitcast [100 x i32] addrspace(5)* %x to i8 addrspace(5)*
|
||||
call void @llvm.lifetime.start.p5i8(i64 400, i8 addrspace(5)* nonnull %alloca.bc) #0
|
||||
%arrayidx = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %i
|
||||
store i32 7, i32 addrspace(5)* %arrayidx, align 4
|
||||
%arrayidx2 = getelementptr inbounds [100 x i32], [100 x i32] addrspace(5)* %x, i32 0, i32 %j
|
||||
%ld = load i32, i32 addrspace(5)* %arrayidx2, align 4
|
||||
store i32 %ld, i32 addrspace(1)* %a, align 4
|
||||
call void @llvm.lifetime.end.p5i8(i64 400, i8 addrspace(5)* nonnull %alloca.bc) #0
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.lifetime.start.p5i8(i64, i8 addrspace(5)* nocapture) #1
|
||||
|
||||
declare void @llvm.lifetime.end.p5i8(i64, i8 addrspace(5)* nocapture) #1
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
attributes #1 = { argmemonly nounwind }
|
|
@ -1,24 +0,0 @@
|
|||
; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
|
||||
|
||||
target datalayout = "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"
|
||||
|
||||
; GCN-LABEL: @test_unaligned_load
|
||||
; GCN: buffer_load_dword
|
||||
; GCN-NOT: flat_load_dword
|
||||
define amdgpu_kernel void @test_unaligned_load(<16 x double> addrspace(1)* %results, i32 %i) {
|
||||
entry:
|
||||
%a = inttoptr i32 %i to <16 x double> addrspace(5)*
|
||||
%v = load <16 x double>, <16 x double> addrspace(5)* %a, align 8
|
||||
store <16 x double> %v, <16 x double> addrspace(1)* %results, align 128
|
||||
ret void
|
||||
}
|
||||
|
||||
; GCN-LABEL: @test_unaligned_store
|
||||
; GCN: buffer_store_dword
|
||||
; GCN-NOT: flat_store_dword
|
||||
define amdgpu_kernel void @test_unaligned_store(<16 x double> %v, i32 %i) {
|
||||
entry:
|
||||
%a = inttoptr i32 %i to <16 x double> addrspace(5)*
|
||||
store <16 x double> %v, <16 x double> addrspace(5)* %a, align 8
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue