[OpenCL] Mark kernel arguments as ABI aligned

Following the discussion on D118229, this marks all pointer-typed
kernel arguments as having ABI alignment, per section 6.3.5 of
the OpenCL spec:

> For arguments to a __kernel function declared to be a pointer to
> a data type, the OpenCL compiler can assume that the pointee is
> always appropriately aligned as required by the data type.

Differential Revision: https://reviews.llvm.org/D118894
This commit is contained in:
Nikita Popov 2022-02-03 14:46:57 +01:00
parent 997027347d
commit 18834dca2d
6 changed files with 48 additions and 8 deletions

View File

@ -2485,6 +2485,20 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
}
}
// From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types:
// > For arguments to a __kernel function declared to be a pointer to a
// > data type, the OpenCL compiler can assume that the pointee is always
// > appropriately aligned as required by the data type.
if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
ParamType->isPointerType()) {
QualType PTy = ParamType->getPointeeType();
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
llvm::Align Alignment =
getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
Attrs.addAlignmentAttr(Alignment);
}
}
switch (FI.getExtParameterInfo(ArgNo).getABI()) {
case ParameterABI::Ordinary:
break;

View File

@ -1,6 +1,6 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly %out)
// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
kernel void test_kernel(global int *out)

View File

@ -34,7 +34,7 @@ void callee(int id, __global int *out) {
out[id] = id;
}
// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* align 4 %{{.*}}, i32 addrspace(1)* align 4 %b, i32 %i)
kernel void device_side_enqueue(global int *a, global int *b, int i) {
// COMMON: %default_queue = alloca %opencl.queue_t*
queue_t default_queue;

View File

@ -0,0 +1,26 @@
// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
// Test that pointer arguments to kernels are assumed to be ABI aligned.
struct __attribute__((packed, aligned(1))) packed {
int i32;
};
typedef __attribute__((ext_vector_type(4))) int int4;
typedef __attribute__((ext_vector_type(2))) float float2;
kernel void test(
global int *i32,
global long *i64,
global int4 *v4i32,
global float2 *v2f32,
global void *v,
global struct packed *p) {
// CHECK-LABEL: spir_kernel void @test(
// CHECK-SAME: i32* nocapture noundef align 4 %i32,
// CHECK-SAME: i64* nocapture noundef align 8 %i64,
// CHECK-SAME: <4 x i32>* nocapture noundef align 16 %v4i32,
// CHECK-SAME: <2 x float>* nocapture noundef align 8 %v2f32,
// CHECK-SAME: i8* nocapture noundef %v,
// CHECK-SAME: %struct.packed* nocapture noundef align 1 %p)
}

View File

@ -28,7 +28,7 @@ kernel void test_single(int_single input, global int* output) {
// CHECK: spir_kernel
// AMDGCN: define{{.*}} amdgpu_kernel void @test_single
// CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
// CHECK: i32* nocapture noundef writeonly %output
// CHECK: i32* nocapture noundef writeonly align 4 %output
output[0] = input.a;
}
@ -36,7 +36,7 @@ kernel void test_pair(int_pair input, global int* output) {
// CHECK: spir_kernel
// AMDGCN: define{{.*}} amdgpu_kernel void @test_pair
// CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
// CHECK: i32* nocapture noundef writeonly %output
// CHECK: i32* nocapture noundef writeonly align 4 %output
output[0] = (int)input.a;
output[1] = (int)input.b;
}
@ -45,7 +45,7 @@ kernel void test_kernel(test_struct input, global int* output) {
// CHECK: spir_kernel
// AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel
// CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
// CHECK: i32* nocapture noundef writeonly %output
// CHECK: i32* nocapture noundef writeonly align 4 %output
output[0] = input.elementA;
output[1] = input.elementB;
output[2] = (int)input.elementC;

View File

@ -5,14 +5,14 @@ int get_dummy_id(int D);
kernel void bar(global int *A);
kernel void foo(global int *A)
// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef %A)
// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef align 4 %A)
{
int id = get_dummy_id(0);
// CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
A[id] = id;
bar(A);
// CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef %A)
// CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef align 4 %A)
}
// CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef)
// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef align 4)