forked from OSchip/llvm-project
[OpenCL] Mark kernel arguments as ABI aligned
Following the discussion on D118229, this marks all pointer-typed kernel arguments as having ABI alignment, per section 6.3.5 of the OpenCL spec: > For arguments to a __kernel function declared to be a pointer to > a data type, the OpenCL compiler can assume that the pointee is > always appropriately aligned as required by the data type. Differential Revision: https://reviews.llvm.org/D118894
This commit is contained in:
parent
997027347d
commit
18834dca2d
|
@ -2485,6 +2485,20 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
|
|||
}
|
||||
}
|
||||
|
||||
// From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types:
|
||||
// > For arguments to a __kernel function declared to be a pointer to a
|
||||
// > data type, the OpenCL compiler can assume that the pointee is always
|
||||
// > appropriately aligned as required by the data type.
|
||||
if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
|
||||
ParamType->isPointerType()) {
|
||||
QualType PTy = ParamType->getPointeeType();
|
||||
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
|
||||
llvm::Align Alignment =
|
||||
getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
|
||||
Attrs.addAlignmentAttr(Alignment);
|
||||
}
|
||||
}
|
||||
|
||||
switch (FI.getExtParameterInfo(ArgNo).getABI()) {
|
||||
case ParameterABI::Ordinary:
|
||||
break;
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
// REQUIRES: amdgpu-registered-target
|
||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
|
||||
// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly %out)
|
||||
// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
|
||||
// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
|
||||
|
||||
kernel void test_kernel(global int *out)
|
||||
|
|
|
@ -34,7 +34,7 @@ void callee(int id, __global int *out) {
|
|||
out[id] = id;
|
||||
}
|
||||
|
||||
// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
|
||||
// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* align 4 %{{.*}}, i32 addrspace(1)* align 4 %b, i32 %i)
|
||||
kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
||||
// COMMON: %default_queue = alloca %opencl.queue_t*
|
||||
queue_t default_queue;
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
|
||||
|
||||
// Test that pointer arguments to kernels are assumed to be ABI aligned.
|
||||
|
||||
struct __attribute__((packed, aligned(1))) packed {
|
||||
int i32;
|
||||
};
|
||||
|
||||
typedef __attribute__((ext_vector_type(4))) int int4;
|
||||
typedef __attribute__((ext_vector_type(2))) float float2;
|
||||
|
||||
kernel void test(
|
||||
global int *i32,
|
||||
global long *i64,
|
||||
global int4 *v4i32,
|
||||
global float2 *v2f32,
|
||||
global void *v,
|
||||
global struct packed *p) {
|
||||
// CHECK-LABEL: spir_kernel void @test(
|
||||
// CHECK-SAME: i32* nocapture noundef align 4 %i32,
|
||||
// CHECK-SAME: i64* nocapture noundef align 8 %i64,
|
||||
// CHECK-SAME: <4 x i32>* nocapture noundef align 16 %v4i32,
|
||||
// CHECK-SAME: <2 x float>* nocapture noundef align 8 %v2f32,
|
||||
// CHECK-SAME: i8* nocapture noundef %v,
|
||||
// CHECK-SAME: %struct.packed* nocapture noundef align 1 %p)
|
||||
}
|
|
@ -28,7 +28,7 @@ kernel void test_single(int_single input, global int* output) {
|
|||
// CHECK: spir_kernel
|
||||
// AMDGCN: define{{.*}} amdgpu_kernel void @test_single
|
||||
// CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
|
||||
// CHECK: i32* nocapture noundef writeonly %output
|
||||
// CHECK: i32* nocapture noundef writeonly align 4 %output
|
||||
output[0] = input.a;
|
||||
}
|
||||
|
||||
|
@ -36,7 +36,7 @@ kernel void test_pair(int_pair input, global int* output) {
|
|||
// CHECK: spir_kernel
|
||||
// AMDGCN: define{{.*}} amdgpu_kernel void @test_pair
|
||||
// CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
|
||||
// CHECK: i32* nocapture noundef writeonly %output
|
||||
// CHECK: i32* nocapture noundef writeonly align 4 %output
|
||||
output[0] = (int)input.a;
|
||||
output[1] = (int)input.b;
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ kernel void test_kernel(test_struct input, global int* output) {
|
|||
// CHECK: spir_kernel
|
||||
// AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel
|
||||
// CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
|
||||
// CHECK: i32* nocapture noundef writeonly %output
|
||||
// CHECK: i32* nocapture noundef writeonly align 4 %output
|
||||
output[0] = input.elementA;
|
||||
output[1] = input.elementB;
|
||||
output[2] = (int)input.elementC;
|
||||
|
|
|
@ -5,14 +5,14 @@ int get_dummy_id(int D);
|
|||
kernel void bar(global int *A);
|
||||
|
||||
kernel void foo(global int *A)
|
||||
// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef %A)
|
||||
// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef align 4 %A)
|
||||
{
|
||||
int id = get_dummy_id(0);
|
||||
// CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
|
||||
A[id] = id;
|
||||
bar(A);
|
||||
// CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef %A)
|
||||
// CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef align 4 %A)
|
||||
}
|
||||
|
||||
// CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
|
||||
// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef)
|
||||
// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef align 4)
|
||||
|
|
Loading…
Reference in New Issue