forked from OSchip/llvm-project
[GVN] add early exit to ConstantFoldLoadThroughBitcast [NFC]
And adds some additional test coverage to ensure later commits don't introduce regressions. Differential Revision: https://reviews.llvm.org/D59730
This commit is contained in:
parent
fefe6a6642
commit
19f01a4847
|
@ -333,10 +333,15 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
|
|||
const DataLayout &DL) {
|
||||
do {
|
||||
Type *SrcTy = C->getType();
|
||||
uint64_t DestSize = DL.getTypeSizeInBits(DestTy);
|
||||
uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy);
|
||||
if (SrcSize < DestSize)
|
||||
return nullptr;
|
||||
|
||||
// If the type sizes are the same and a cast is legal, just directly
|
||||
// cast the constant.
|
||||
if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
|
||||
// But be careful not to coerce non-integral pointers illegally.
|
||||
if (SrcSize == DestSize) {
|
||||
Instruction::CastOps Cast = Instruction::BitCast;
|
||||
// If we are going from a pointer to int or vice versa, we spell the cast
|
||||
// differently.
|
||||
|
|
|
@ -169,7 +169,14 @@ define i8 addrspace(4)* @forward_store_zero2(i8 addrspace(4)* addrspace(4)* %loc
|
|||
ret i8 addrspace(4)* %ref
|
||||
}
|
||||
|
||||
|
||||
|
||||
@NonZeroConstant = constant <4 x i64> <i64 3, i64 3, i64 3, i64 3>
|
||||
@NonZeroConstant2 = constant <4 x i64 addrspace(4)*> <
|
||||
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
|
||||
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
|
||||
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
|
||||
i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)>
|
||||
@ZeroConstant = constant <4 x i64> zeroinitializer
|
||||
|
||||
|
||||
|
@ -190,6 +197,39 @@ entry:
|
|||
ret i8 addrspace(4)* %ref
|
||||
}
|
||||
|
||||
define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) {
|
||||
; CHECK-LABEL: @neg_forward_memcopy2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false)
|
||||
; CHECK-NEXT: [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]]
|
||||
; CHECK-NEXT: ret i64 addrspace(4)* [[REF]]
|
||||
;
|
||||
entry:
|
||||
%loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
|
||||
%src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
|
||||
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
|
||||
%ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc
|
||||
ret i64 addrspace(4)* %ref
|
||||
}
|
||||
|
||||
; TODO: missed optimization
|
||||
define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) {
|
||||
; CHECK-LABEL: @forward_memcopy(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false)
|
||||
; CHECK-NEXT: [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
|
||||
; CHECK-NEXT: ret i8 addrspace(4)* [[REF]]
|
||||
;
|
||||
entry:
|
||||
%loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
|
||||
%src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
|
||||
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
|
||||
%ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
|
||||
ret i8 addrspace(4)* %ref
|
||||
}
|
||||
|
||||
define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) {
|
||||
; CHECK-LABEL: @neg_forward_memcpy_vload(
|
||||
; CHECK-NEXT: entry:
|
||||
|
@ -206,6 +246,37 @@ entry:
|
|||
ret <1 x i8 addrspace(4)*> %ref
|
||||
}
|
||||
|
||||
define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)*> addrspace(4)* %loc) {
|
||||
; CHECK-LABEL: @neg_forward_memcpy_vload2(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false)
|
||||
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]]
|
||||
; CHECK-NEXT: ret <4 x i64 addrspace(4)*> [[REF]]
|
||||
;
|
||||
entry:
|
||||
%loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)*
|
||||
%src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
|
||||
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
|
||||
%ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc
|
||||
ret <4 x i64 addrspace(4)*> %ref
|
||||
}
|
||||
|
||||
define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) {
|
||||
; CHECK-LABEL: @neg_forward_memcpy_vload3(
|
||||
; CHECK-NEXT: entry:
|
||||
; CHECK-NEXT: [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
|
||||
; CHECK-NEXT: call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false)
|
||||
; CHECK-NEXT: [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]]
|
||||
; CHECK-NEXT: ret <4 x i64> [[REF]]
|
||||
;
|
||||
entry:
|
||||
%loc.bc = bitcast <4 x i64> addrspace(4)* %loc to i8 addrspace(4)*
|
||||
%src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
|
||||
call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
|
||||
%ref = load <4 x i64>, <4 x i64> addrspace(4)* %loc
|
||||
ret <4 x i64> %ref
|
||||
}
|
||||
|
||||
; Can forward since we can do so w/o breaking types
|
||||
; TODO: missed optimization
|
||||
|
|
|
@ -65,7 +65,7 @@ define linkonce_odr void @_ZN1SC2Ev(%struct.S*) unnamed_addr align 2 {
|
|||
}
|
||||
|
||||
define internal %struct.Foo* @_ZL3foov() {
|
||||
ret %struct.Foo* null
|
||||
ret %struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1)
|
||||
}
|
||||
|
||||
define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 {
|
||||
|
@ -73,7 +73,7 @@ define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 {
|
|||
store %struct.Q* %0, %struct.Q** %2, align 8
|
||||
%3 = load %struct.Q*, %struct.Q** %2, align 8
|
||||
%4 = getelementptr inbounds %struct.Q, %struct.Q* %3, i32 0, i32 0
|
||||
%5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* null)
|
||||
%5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1))
|
||||
store i32 %5, i32* %4, align 4
|
||||
ret void
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue