[CUDA][HIP] Fix store of vtbl in ctor

vtbl itself is in default global address space. When clang emits
ctor, it gets a pointer to the vtbl field based on the this pointer,
then stores vtbl to the pointer.

Since this pointer can point to any address space (e.g. an object
created in stack), this pointer points to default address space, therefore
the pointer to vtbl field in this object should also be in default
address space.

Currently, clang incorrectly casts the pointer to vtbl field in this object
to global address space. This caused assertions in backend.

This patch fixes that by removing the incorrect addr space cast.

Reviewed by: Artem Belevich

Differential Revision: https://reviews.llvm.org/D103835
This commit is contained in:
Yaxun (Sam) Liu 2021-06-07 15:01:25 -04:00
parent 7f6c878a2c
commit 054cc3b1b4
2 changed files with 20 additions and 1 deletions

View File

@ -2518,8 +2518,10 @@ void CodeGenFunction::InitializeVTablePointer(const VPtr &Vptr) {
llvm::FunctionType::get(CGM.Int32Ty, /*isVarArg=*/true)
->getPointerTo(ProgAS)
->getPointerTo(GlobalsAS);
// vtable field is is derived from `this` pointer, therefore it should be in
// default address space.
VTableField = Builder.CreatePointerBitCastOrAddrSpaceCast(
VTableField, VTablePtrTy->getPointerTo(GlobalsAS));
VTableField, VTablePtrTy->getPointerTo());
VTableAddressPoint = Builder.CreatePointerBitCastOrAddrSpaceCast(
VTableAddressPoint, VTablePtrTy);

View File

@ -0,0 +1,17 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -target-cpu gfx906 \
// RUN: -emit-llvm -o - %s | FileCheck %s
#include "Inputs/cuda.h"
// CHECK-LABEL: define {{.*}}@_ZN1AC2Ev(%struct.A* nonnull align 8 dereferenceable(8) %this)
// CHECK: store %struct.A* %this, %struct.A** %this.addr.ascast
// CHECK: %this1 = load %struct.A*, %struct.A** %this.addr.ascast
// CHECK: %[[VTFIELD:.*]] = bitcast %struct.A* %this1 to i32 (...)* addrspace(1)**
// CHECK: store i32 (...)* addrspace(1)* bitcast{{.*}} @_ZTV1A{{.*}}, i32 (...)* addrspace(1)** %[[VTFIELD]]
struct A {
__device__ virtual void vf() {}
};
__global__ void kern() {
A a;
}