forked from OSchip/llvm-project
[OpenCL] Mark kernel arguments as ABI aligned
Following the discussion on D118229, this marks all pointer-typed kernel arguments as having ABI alignment, per section 6.3.5 of the OpenCL spec: > For arguments to a __kernel function declared to be a pointer to > a data type, the OpenCL compiler can assume that the pointee is > always appropriately aligned as required by the data type. Differential Revision: https://reviews.llvm.org/D118894
This commit is contained in:
parent
997027347d
commit
18834dca2d
|
@ -2485,6 +2485,20 @@ void CodeGenModule::ConstructAttributeList(StringRef Name,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types:
|
||||||
|
// > For arguments to a __kernel function declared to be a pointer to a
|
||||||
|
// > data type, the OpenCL compiler can assume that the pointee is always
|
||||||
|
// > appropriately aligned as required by the data type.
|
||||||
|
if (TargetDecl && TargetDecl->hasAttr<OpenCLKernelAttr>() &&
|
||||||
|
ParamType->isPointerType()) {
|
||||||
|
QualType PTy = ParamType->getPointeeType();
|
||||||
|
if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) {
|
||||||
|
llvm::Align Alignment =
|
||||||
|
getNaturalPointeeTypeAlignment(ParamType).getAsAlign();
|
||||||
|
Attrs.addAlignmentAttr(Alignment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (FI.getExtParameterInfo(ArgNo).getABI()) {
|
switch (FI.getExtParameterInfo(ArgNo).getABI()) {
|
||||||
case ParameterABI::Ordinary:
|
case ParameterABI::Ordinary:
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
// REQUIRES: amdgpu-registered-target
|
// REQUIRES: amdgpu-registered-target
|
||||||
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
|
// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s
|
||||||
// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly %out)
|
// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out)
|
||||||
// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
|
// CHECK: store i32 4, i32 addrspace(1)* %out, align 4
|
||||||
|
|
||||||
kernel void test_kernel(global int *out)
|
kernel void test_kernel(global int *out)
|
||||||
|
|
|
@ -34,7 +34,7 @@ void callee(int id, __global int *out) {
|
||||||
out[id] = id;
|
out[id] = id;
|
||||||
}
|
}
|
||||||
|
|
||||||
// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i)
|
// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* align 4 %{{.*}}, i32 addrspace(1)* align 4 %b, i32 %i)
|
||||||
kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
kernel void device_side_enqueue(global int *a, global int *b, int i) {
|
||||||
// COMMON: %default_queue = alloca %opencl.queue_t*
|
// COMMON: %default_queue = alloca %opencl.queue_t*
|
||||||
queue_t default_queue;
|
queue_t default_queue;
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s
|
||||||
|
|
||||||
|
// Test that pointer arguments to kernels are assumed to be ABI aligned.
|
||||||
|
|
||||||
|
struct __attribute__((packed, aligned(1))) packed {
|
||||||
|
int i32;
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef __attribute__((ext_vector_type(4))) int int4;
|
||||||
|
typedef __attribute__((ext_vector_type(2))) float float2;
|
||||||
|
|
||||||
|
kernel void test(
|
||||||
|
global int *i32,
|
||||||
|
global long *i64,
|
||||||
|
global int4 *v4i32,
|
||||||
|
global float2 *v2f32,
|
||||||
|
global void *v,
|
||||||
|
global struct packed *p) {
|
||||||
|
// CHECK-LABEL: spir_kernel void @test(
|
||||||
|
// CHECK-SAME: i32* nocapture noundef align 4 %i32,
|
||||||
|
// CHECK-SAME: i64* nocapture noundef align 8 %i64,
|
||||||
|
// CHECK-SAME: <4 x i32>* nocapture noundef align 16 %v4i32,
|
||||||
|
// CHECK-SAME: <2 x float>* nocapture noundef align 8 %v2f32,
|
||||||
|
// CHECK-SAME: i8* nocapture noundef %v,
|
||||||
|
// CHECK-SAME: %struct.packed* nocapture noundef align 1 %p)
|
||||||
|
}
|
|
@ -28,7 +28,7 @@ kernel void test_single(int_single input, global int* output) {
|
||||||
// CHECK: spir_kernel
|
// CHECK: spir_kernel
|
||||||
// AMDGCN: define{{.*}} amdgpu_kernel void @test_single
|
// AMDGCN: define{{.*}} amdgpu_kernel void @test_single
|
||||||
// CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
|
// CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single)
|
||||||
// CHECK: i32* nocapture noundef writeonly %output
|
// CHECK: i32* nocapture noundef writeonly align 4 %output
|
||||||
output[0] = input.a;
|
output[0] = input.a;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ kernel void test_pair(int_pair input, global int* output) {
|
||||||
// CHECK: spir_kernel
|
// CHECK: spir_kernel
|
||||||
// AMDGCN: define{{.*}} amdgpu_kernel void @test_pair
|
// AMDGCN: define{{.*}} amdgpu_kernel void @test_pair
|
||||||
// CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
|
// CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair)
|
||||||
// CHECK: i32* nocapture noundef writeonly %output
|
// CHECK: i32* nocapture noundef writeonly align 4 %output
|
||||||
output[0] = (int)input.a;
|
output[0] = (int)input.a;
|
||||||
output[1] = (int)input.b;
|
output[1] = (int)input.b;
|
||||||
}
|
}
|
||||||
|
@ -45,7 +45,7 @@ kernel void test_kernel(test_struct input, global int* output) {
|
||||||
// CHECK: spir_kernel
|
// CHECK: spir_kernel
|
||||||
// AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel
|
// AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel
|
||||||
// CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
|
// CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct)
|
||||||
// CHECK: i32* nocapture noundef writeonly %output
|
// CHECK: i32* nocapture noundef writeonly align 4 %output
|
||||||
output[0] = input.elementA;
|
output[0] = input.elementA;
|
||||||
output[1] = input.elementB;
|
output[1] = input.elementB;
|
||||||
output[2] = (int)input.elementC;
|
output[2] = (int)input.elementC;
|
||||||
|
|
|
@ -5,14 +5,14 @@ int get_dummy_id(int D);
|
||||||
kernel void bar(global int *A);
|
kernel void bar(global int *A);
|
||||||
|
|
||||||
kernel void foo(global int *A)
|
kernel void foo(global int *A)
|
||||||
// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef %A)
|
// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef align 4 %A)
|
||||||
{
|
{
|
||||||
int id = get_dummy_id(0);
|
int id = get_dummy_id(0);
|
||||||
// CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
|
// CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0)
|
||||||
A[id] = id;
|
A[id] = id;
|
||||||
bar(A);
|
bar(A);
|
||||||
// CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef %A)
|
// CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef align 4 %A)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
|
// CHECK: declare spir_func i32 @get_dummy_id(i32 noundef)
|
||||||
// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef)
|
// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef align 4)
|
||||||
|
|
Loading…
Reference in New Issue