forked from OSchip/llvm-project
[gvn] Handle a corner case w/vectors of non-integral pointers
If we try to coerce a vector of non-integral pointers to a narrower type (either narrower vector or single pointer), we use inttoptr and violate the semantics of non-integral pointers. In theory, we can handle many of these cases, we just need to use a different code idiom to convert without going through inttoptr and back. This shows up as wrong code bugs, and in some cases, crashes due to failed asserts. Modeled after a change which has lived downstream for a couple years, though completely rewritten to be more idiomatic.
This commit is contained in:
parent
2ef9d21e1a
commit
f29645e7af
|
@ -17,6 +17,7 @@ static bool isFirstClassAggregateOrScalableType(Type *Ty) {
|
||||||
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
|
bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
|
||||||
const DataLayout &DL) {
|
const DataLayout &DL) {
|
||||||
Type *StoredTy = StoredVal->getType();
|
Type *StoredTy = StoredVal->getType();
|
||||||
|
|
||||||
if (StoredTy == LoadTy)
|
if (StoredTy == LoadTy)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
@ -46,6 +47,14 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
|
||||||
return CI->isNullValue();
|
return CI->isNullValue();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// The implementation below uses inttoptr for vectors of unequal size; we
|
||||||
|
// can't allow this for non integral pointers. Wecould teach it to extract
|
||||||
|
// exact subvectors if desired.
|
||||||
|
if (DL.isNonIntegralPointerType(StoredTy->getScalarType()) &&
|
||||||
|
StoreSize != DL.getTypeSizeInBits(LoadTy).getFixedSize())
|
||||||
|
return false;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -223,14 +232,8 @@ int analyzeLoadFromClobberingStore(Type *LoadTy, Value *LoadPtr,
|
||||||
if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
|
if (isFirstClassAggregateOrScalableType(StoredVal->getType()))
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
// Don't coerce non-integral pointers to integers or vice versa.
|
if (!canCoerceMustAliasedValueToLoad(StoredVal, LoadTy, DL))
|
||||||
if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()) !=
|
return -1;
|
||||||
DL.isNonIntegralPointerType(LoadTy->getScalarType())) {
|
|
||||||
// Allow casts of zero values to null as a special case
|
|
||||||
auto *CI = dyn_cast<Constant>(StoredVal);
|
|
||||||
if (!CI || !CI->isNullValue())
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
Value *StorePtr = DepSI->getPointerOperand();
|
Value *StorePtr = DepSI->getPointerOperand();
|
||||||
uint64_t StoreSize =
|
uint64_t StoreSize =
|
||||||
|
@ -333,9 +336,7 @@ int analyzeLoadFromClobberingLoad(Type *LoadTy, Value *LoadPtr, LoadInst *DepLI,
|
||||||
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
|
if (DepLI->getType()->isStructTy() || DepLI->getType()->isArrayTy())
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
// Don't coerce non-integral pointers to integers or vice versa.
|
if (!canCoerceMustAliasedValueToLoad(DepLI, LoadTy, DL))
|
||||||
if (DL.isNonIntegralPointerType(DepLI->getType()->getScalarType()) !=
|
|
||||||
DL.isNonIntegralPointerType(LoadTy->getScalarType()))
|
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
Value *DepPtr = DepLI->getPointerOperand();
|
Value *DepPtr = DepLI->getPointerOperand();
|
||||||
|
|
|
@ -202,7 +202,7 @@ define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* %
|
||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false)
|
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false)
|
||||||
; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]]
|
; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]], align 8
|
||||||
; CHECK-NEXT: ret i64 addrspace(4)* [[REF]]
|
; CHECK-NEXT: ret i64 addrspace(4)* [[REF]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
|
@ -219,7 +219,7 @@ define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) {
|
||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false)
|
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false)
|
||||||
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
|
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8
|
||||||
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
|
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
|
@ -266,7 +266,7 @@ define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)*
|
||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false)
|
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false)
|
||||||
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]]
|
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]], align 32
|
||||||
; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]]
|
; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
|
@ -282,7 +282,7 @@ define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) {
|
||||||
; CHECK-NEXT: entry:
|
; CHECK-NEXT: entry:
|
||||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false)
|
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false)
|
||||||
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]]
|
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]], align 32
|
||||||
; CHECK-NEXT: ret <4 x i64> [[REF]]
|
; CHECK-NEXT: ret <4 x i64> [[REF]]
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
|
@ -386,3 +386,47 @@ entry:
|
||||||
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off
|
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc.off
|
||||||
ret i8 addrspace(4)* %ref
|
ret i8 addrspace(4)* %ref
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define void @smaller_vector(i8* %p) {
|
||||||
|
; CHECK-LABEL: @smaller_vector(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>*
|
||||||
|
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to <2 x i64 addrspace(4)*>*
|
||||||
|
; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32
|
||||||
|
; CHECK-NEXT: [[V2:%.*]] = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* [[B]], align 32
|
||||||
|
; CHECK-NEXT: call void @use.v2(<2 x i64 addrspace(4)*> [[V2]])
|
||||||
|
; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]])
|
||||||
|
; CHECK-NEXT: ret void
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%a = bitcast i8* %p to <4 x i64 addrspace(4)*>*
|
||||||
|
%b = bitcast i8* %p to <2 x i64 addrspace(4)*>*
|
||||||
|
%v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32
|
||||||
|
%v2 = load <2 x i64 addrspace(4)*>, <2 x i64 addrspace(4)*>* %b, align 32
|
||||||
|
call void @use.v2(<2 x i64 addrspace(4)*> %v2)
|
||||||
|
call void @use.v4(<4 x i64 addrspace(4)*> %v4)
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define i64 addrspace(4)* @vector_extract(i8* %p) {
|
||||||
|
; CHECK-LABEL: @vector_extract(
|
||||||
|
; CHECK-NEXT: entry:
|
||||||
|
; CHECK-NEXT: [[A:%.*]] = bitcast i8* [[P:%.*]] to <4 x i64 addrspace(4)*>*
|
||||||
|
; CHECK-NEXT: [[B:%.*]] = bitcast i8* [[P]] to i64 addrspace(4)**
|
||||||
|
; CHECK-NEXT: [[V4:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* [[A]], align 32
|
||||||
|
; CHECK-NEXT: [[RES:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)** [[B]], align 32
|
||||||
|
; CHECK-NEXT: call void @use.v4(<4 x i64 addrspace(4)*> [[V4]])
|
||||||
|
; CHECK-NEXT: ret i64 addrspace(4)* [[RES]]
|
||||||
|
;
|
||||||
|
entry:
|
||||||
|
%a = bitcast i8* %p to <4 x i64 addrspace(4)*>*
|
||||||
|
%b = bitcast i8* %p to i64 addrspace(4)**
|
||||||
|
%v4 = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*>* %a, align 32
|
||||||
|
%res = load i64 addrspace(4)*, i64 addrspace(4)** %b, align 32
|
||||||
|
call void @use.v4(<4 x i64 addrspace(4)*> %v4)
|
||||||
|
ret i64 addrspace(4)* %res
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @use.v2(<2 x i64 addrspace(4)*>)
|
||||||
|
declare void @use.v4(<4 x i64 addrspace(4)*>)
|
||||||
|
|
Loading…
Reference in New Issue