diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 7ee4cdbb6ef0..ab1aa6cc7649 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -2485,6 +2485,20 @@ void CodeGenModule::ConstructAttributeList(StringRef Name, } } + // From OpenCL spec v3.0.10 section 6.3.5 Alignment of Types: + // > For arguments to a __kernel function declared to be a pointer to a + // > data type, the OpenCL compiler can assume that the pointee is always + // > appropriately aligned as required by the data type. + if (TargetDecl && TargetDecl->hasAttr() && + ParamType->isPointerType()) { + QualType PTy = ParamType->getPointeeType(); + if (!PTy->isIncompleteType() && PTy->isConstantSizeType()) { + llvm::Align Alignment = + getNaturalPointeeTypeAlignment(ParamType).getAsAlign(); + Attrs.addAlignmentAttr(Alignment); + } + } + switch (FI.getExtParameterInfo(ArgNo).getABI()) { case ParameterABI::Ordinary: break; diff --git a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl index 757cc0bd577d..db7270d1c4bc 100755 --- a/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-call-kernel.cl @@ -1,6 +1,6 @@ // REQUIRES: amdgpu-registered-target // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -S -emit-llvm -o - %s | FileCheck %s -// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly %out) +// CHECK: define{{.*}} amdgpu_kernel void @test_call_kernel(i32 addrspace(1)* nocapture noundef writeonly align 4 %out) // CHECK: store i32 4, i32 addrspace(1)* %out, align 4 kernel void test_kernel(global int *out) diff --git a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl index fdbc0aaa6e26..f90c48ef0572 100644 --- a/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl +++ b/clang/test/CodeGenOpenCL/cl20-device-side-enqueue.cl @@ -34,7 +34,7 @@ void callee(int id, __global int *out) { out[id] = id; } -// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* %{{.*}}, i32 addrspace(1)* %b, i32 %i) +// COMMON-LABEL: define{{.*}} spir_kernel void @device_side_enqueue(i32 addrspace(1)* align 4 %{{.*}}, i32 addrspace(1)* align 4 %b, i32 %i) kernel void device_side_enqueue(global int *a, global int *b, int i) { // COMMON: %default_queue = alloca %opencl.queue_t* queue_t default_queue; diff --git a/clang/test/CodeGenOpenCL/kernel-param-alignment.cl b/clang/test/CodeGenOpenCL/kernel-param-alignment.cl new file mode 100644 index 000000000000..862f0b62ca98 --- /dev/null +++ b/clang/test/CodeGenOpenCL/kernel-param-alignment.cl @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 %s -cl-std=CL1.2 -emit-llvm -triple x86_64-unknown-unknown -o - | FileCheck %s + +// Test that pointer arguments to kernels are assumed to be ABI aligned. + +struct __attribute__((packed, aligned(1))) packed { + int i32; +}; + +typedef __attribute__((ext_vector_type(4))) int int4; +typedef __attribute__((ext_vector_type(2))) float float2; + +kernel void test( + global int *i32, + global long *i64, + global int4 *v4i32, + global float2 *v2f32, + global void *v, + global struct packed *p) { +// CHECK-LABEL: spir_kernel void @test( +// CHECK-SAME: i32* nocapture noundef align 4 %i32, +// CHECK-SAME: i64* nocapture noundef align 8 %i64, +// CHECK-SAME: <4 x i32>* nocapture noundef align 16 %v4i32, +// CHECK-SAME: <2 x float>* nocapture noundef align 8 %v2f32, +// CHECK-SAME: i8* nocapture noundef %v, +// CHECK-SAME: %struct.packed* nocapture noundef align 1 %p) +} diff --git a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl index 5797cd75f58a..8c7592119cd6 100644 --- a/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl +++ b/clang/test/CodeGenOpenCL/kernels-have-spir-cc-by-default.cl @@ -28,7 +28,7 @@ kernel void test_single(int_single input, global int* output) { // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_single // CHECK: struct.int_single* nocapture {{.*}} byval(%struct.int_single) -// CHECK: i32* nocapture noundef writeonly %output +// CHECK: i32* nocapture noundef writeonly align 4 %output output[0] = input.a; } @@ -36,7 +36,7 @@ kernel void test_pair(int_pair input, global int* output) { // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_pair // CHECK: struct.int_pair* nocapture {{.*}} byval(%struct.int_pair) -// CHECK: i32* nocapture noundef writeonly %output +// CHECK: i32* nocapture noundef writeonly align 4 %output output[0] = (int)input.a; output[1] = (int)input.b; } @@ -45,7 +45,7 @@ kernel void test_kernel(test_struct input, global int* output) { // CHECK: spir_kernel // AMDGCN: define{{.*}} amdgpu_kernel void @test_kernel // CHECK: struct.test_struct* nocapture {{.*}} byval(%struct.test_struct) -// CHECK: i32* nocapture noundef writeonly %output +// CHECK: i32* nocapture noundef writeonly align 4 %output output[0] = input.elementA; output[1] = input.elementB; output[2] = (int)input.elementC; diff --git a/clang/test/CodeGenOpenCL/spir-calling-conv.cl b/clang/test/CodeGenOpenCL/spir-calling-conv.cl index 9bd70f8cedb2..1d7645ad0fe1 100644 --- a/clang/test/CodeGenOpenCL/spir-calling-conv.cl +++ b/clang/test/CodeGenOpenCL/spir-calling-conv.cl @@ -5,14 +5,14 @@ int get_dummy_id(int D); kernel void bar(global int *A); kernel void foo(global int *A) -// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef %A) +// CHECK: define{{.*}} spir_kernel void @foo(i32 addrspace(1)* noundef align 4 %A) { int id = get_dummy_id(0); // CHECK: %{{[a-z0-9_]+}} = tail call spir_func i32 @get_dummy_id(i32 noundef 0) A[id] = id; bar(A); - // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef %A) + // CHECK: tail call spir_kernel void @bar(i32 addrspace(1)* noundef align 4 %A) } // CHECK: declare spir_func i32 @get_dummy_id(i32 noundef) -// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef) +// CHECK: declare spir_kernel void @bar(i32 addrspace(1)* noundef align 4)