diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp index 1939c0e3b504..11b42eca4960 100644 --- a/llvm/lib/Transforms/Utils/VNCoercion.cpp +++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp @@ -17,6 +17,7 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) { bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, const DataLayout &DL) { Type *StoredTy = StoredVal->getType(); + if (StoredTy == LoadTy) return true; @@ -46,6 +47,14 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, return CI->isNullValue(); return false; } + + + // The implementation below uses inttoptr for vectors of unequal size; we + // can't allow this for non integral pointers. Wecould teach it to extract + // exact subvectors if desired. + if (DL.isNonIntegralPointerType(StoredTy->getScalarType()) && + StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize()) + return false; return true; } @@ -223,14 +232,8 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr, if (isFirstClassAggregateOrScalableType(StoredVal->getType())) return -1; - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) { - // Allow casts of zero values to null as a special case - auto *CI = dyn_cast(StoredVal); - if (!CI || !CI->isNullValue()) - return -1; - } + if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL)) + return -1; Value *StorePtr = DepSI->getPointerOperand(); uint64_t StoreSize = @@ -333,9 +336,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI, if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy()) return -1; - // Don't coerce non-integral pointers to integers or vice versa. - if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) != - DL.isNonIntegralPointerType(LoadTy->getScalarType())) + if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL)) return -1; Value *DepPtr = DepLI->getPointerOperand(); diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll index a017dda926e3..872b6648084e 100644 --- a/llvm/test/Transforms/GVN/non-integral-pointers.ll +++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll @@ -202,7 +202,7 @@ define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* % ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]] +; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]], align 8 ; CHECK-NEXT: ret i64 addrspace(4)* [[REF]] ; entry: @@ -219,7 +219,7 @@ define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc +; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8 ; CHECK-NEXT: ret i8 addrspace(4)* [[REF]] ; entry: @@ -266,7 +266,7 @@ define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)* ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]] +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]], align 32 ; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]] ; entry: @@ -282,7 +282,7 @@ define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)* ; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false) -; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]] +; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]], align 32 ; CHECK-NEXT: ret <4 x i64> [[REF]] ; entry: @@ -386,3 +386,47 @@ entry: %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off ret i8 addrspace(4)* %ref } + + +define void @smaller_vector(i8* %p) { +; CHECK-LABEL: @smaller_vector( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>* +; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to <2 x i64 addrspace(4)*>* +; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32 +; CHECK-NEXT: [[V2:%.*]] = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* [[B]], align 32 +; CHECK-NEXT: call void @use.v2(<2 x i64 addrspace(4)*> [[V2]]) +; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]]) +; CHECK-NEXT: ret void +; +entry: + %a = bitcast i8* %p to <4 x i64 addrspace(4)*>* + %b = bitcast i8* %p to <2 x i64 addrspace(4)*>* + %v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32 + %v2 = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* %b, align 32 + call void @use.v2(<2 x i64 addrspace(4)*> %v2) + call void @use.v4(<4 x i64 addrspace(4)*> %v4) + ret void +} + +define i64 addrspace(4)* @vector_extract(i8* %p) { +; CHECK-LABEL: @vector_extract( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>* +; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to i64 addrspace(4)** +; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32 +; CHECK-NEXT: [[RES:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)** [[B]], align 32 +; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]]) +; CHECK-NEXT: ret i64 addrspace(4)* [[RES]] +; +entry: + %a = bitcast i8* %p to <4 x i64 addrspace(4)*>* + %b = bitcast i8* %p to i64 addrspace(4)** + %v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32 + %res = load i64 addrspace(4)*, i64 addrspace(4)** %b, align 32 + call void @use.v4(<4 x i64 addrspace(4)*> %v4) + ret i64 addrspace(4)* %res +} + +declare void @use.v2(<2 x i64 addrspace(4)*>) +declare void @use.v4(<4 x i64 addrspace(4)*>)