forked from OSchip/llvm-project
420 lines
14 KiB
LLVM
420 lines
14 KiB
LLVM
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
|
|
; RUN: opt -passes=instcombine -S < %s | FileCheck %s
|
|
|
|
@gp = global i32* null, align 8
|
|
|
|
declare noalias i8* @malloc(i64)
|
|
|
|
define i1 @compare_global_trivialeq() {
|
|
; CHECK-LABEL: @compare_global_trivialeq(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8
|
|
%cmp = icmp eq i32* %bc, %lgp
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @compare_global_trivialne() {
|
|
; CHECK-LABEL: @compare_global_trivialne(
|
|
; CHECK-NEXT: ret i1 true
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8
|
|
%cmp = icmp ne i32* %bc, %lgp
|
|
ret i1 %cmp
|
|
}
|
|
|
|
|
|
; Although the %m is marked nocapture in the deopt operand in call to function f,
|
|
; we cannot remove the alloc site: call to malloc
|
|
; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
|
|
declare void @f()
|
|
define i1 @compare_and_call_with_deopt() {
|
|
; CHECK-LABEL: @compare_and_call_with_deopt(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
|
|
; CHECK-NEXT: tail call void @f() [ "deopt"(i8* [[M]]) ]
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call i8* @malloc(i64 24)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
|
%cmp = icmp eq i32* %lgp, %bc
|
|
tail call void @f() [ "deopt"(i8* %m) ]
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; Same functon as above with deopt operand in function f, but comparison is NE
|
|
define i1 @compare_ne_and_call_with_deopt() {
|
|
; CHECK-LABEL: @compare_ne_and_call_with_deopt(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
|
|
; CHECK-NEXT: tail call void @f() [ "deopt"(i8* [[M]]) ]
|
|
; CHECK-NEXT: ret i1 true
|
|
;
|
|
%m = call i8* @malloc(i64 24)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
|
%cmp = icmp ne i32* %lgp, %bc
|
|
tail call void @f() [ "deopt"(i8* %m) ]
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; Same function as above, but global not marked nonnull, and we cannot fold the comparison
|
|
define i1 @compare_ne_global_maybe_null() {
|
|
; CHECK-LABEL: @compare_ne_global_maybe_null(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
|
|
; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[M]] to i32*
|
|
; CHECK-NEXT: [[LGP:%.*]] = load i32*, i32** @gp, align 8
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32* [[LGP]], [[BC]]
|
|
; CHECK-NEXT: tail call void @f() [ "deopt"(i8* [[M]]) ]
|
|
; CHECK-NEXT: ret i1 [[CMP]]
|
|
;
|
|
%m = call i8* @malloc(i64 24)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp
|
|
%cmp = icmp ne i32* %lgp, %bc
|
|
tail call void @f() [ "deopt"(i8* %m) ]
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; FIXME: The comparison should fold to false since %m escapes (call to function escape)
|
|
; after the comparison.
|
|
declare void @escape(i8*)
|
|
define i1 @compare_and_call_after() {
|
|
; CHECK-LABEL: @compare_and_call_after(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
|
|
; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[M]] to i32*
|
|
; CHECK-NEXT: [[LGP:%.*]] = load i32*, i32** @gp, align 8, !nonnull !0
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32* [[LGP]], [[BC]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[ESCAPE_CALL:%.*]], label [[JUST_RETURN:%.*]]
|
|
; CHECK: escape_call:
|
|
; CHECK-NEXT: call void @escape(i8* [[M]])
|
|
; CHECK-NEXT: ret i1 true
|
|
; CHECK: just_return:
|
|
; CHECK-NEXT: ret i1 [[CMP]]
|
|
;
|
|
%m = call i8* @malloc(i64 24)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
|
%cmp = icmp eq i32* %bc, %lgp
|
|
br i1 %cmp, label %escape_call, label %just_return
|
|
|
|
escape_call:
|
|
call void @escape(i8* %m)
|
|
ret i1 true
|
|
|
|
just_return:
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @compare_distinct_mallocs() {
|
|
; CHECK-LABEL: @compare_distinct_mallocs(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%n = call i8* @malloc(i64 4)
|
|
%cmp = icmp eq i8* %m, %n
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; the compare is folded to true since the folding compare looks through bitcasts.
|
|
; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
|
|
define i1 @compare_samepointer_under_bitcast() {
|
|
; CHECK-LABEL: @compare_samepointer_under_bitcast(
|
|
; CHECK-NEXT: ret i1 true
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%bc = bitcast i8* %m to i32*
|
|
%bcback = bitcast i32* %bc to i8*
|
|
%cmp = icmp eq i8* %m, %bcback
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; the compare is folded to true since the folding compare looks through bitcasts.
|
|
; The malloc call for %m cannot be elided since it is used in the call to function f.
|
|
define i1 @compare_samepointer_escaped() {
|
|
; CHECK-LABEL: @compare_samepointer_escaped(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: call void @f() [ "deopt"(i8* [[M]]) ]
|
|
; CHECK-NEXT: ret i1 true
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%bc = bitcast i8* %m to i32*
|
|
%bcback = bitcast i32* %bc to i8*
|
|
%cmp = icmp eq i8* %m, %bcback
|
|
call void @f() [ "deopt"(i8* %m) ]
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; Technically, we can fold the %cmp2 comparison, even though %m escapes through
|
|
; the ret statement since `ret` terminates the function and we cannot reach from
|
|
; the ret to cmp.
|
|
; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
|
|
; cross-threading data dependencies since we do not make the distinction between
|
|
; atomic and non-atomic loads in capture tracking.
|
|
define i8* @compare_ret_escape(i8* %c) {
|
|
; CHECK-LABEL: @compare_ret_escape(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[N]], [[C:%.*]]
|
|
; CHECK-NEXT: br i1 [[CMP]], label [[RETST:%.*]], label [[CHK:%.*]]
|
|
; CHECK: retst:
|
|
; CHECK-NEXT: ret i8* [[M]]
|
|
; CHECK: chk:
|
|
; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[M]] to i32*
|
|
; CHECK-NEXT: [[LGP:%.*]] = load i32*, i32** @gp, align 8, !nonnull !0
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32* [[LGP]], [[BC]]
|
|
; CHECK-NEXT: br i1 [[CMP2]], label [[RETST]], label [[CHK2:%.*]]
|
|
; CHECK: chk2:
|
|
; CHECK-NEXT: ret i8* [[N]]
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%n = call i8* @malloc(i64 4)
|
|
%cmp = icmp eq i8* %n, %c
|
|
br i1 %cmp, label %retst, label %chk
|
|
|
|
retst:
|
|
ret i8* %m
|
|
|
|
chk:
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
|
%cmp2 = icmp eq i32* %bc, %lgp
|
|
br i1 %cmp2, label %retst, label %chk2
|
|
|
|
chk2:
|
|
ret i8* %n
|
|
}
|
|
|
|
; The malloc call for %m cannot be elided since it is used in the call to function f.
|
|
; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
|
|
define i1 @compare_distinct_pointer_escape() {
|
|
; CHECK-LABEL: @compare_distinct_pointer_escape(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: tail call void @f() [ "deopt"(i8* [[M]]) ]
|
|
; CHECK-NEXT: ret i1 true
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%n = call i8* @malloc(i64 4)
|
|
tail call void @f() [ "deopt"(i8* %m) ]
|
|
%cmp = icmp ne i8* %m, %n
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; The next block of tests demonstrate a very subtle correctness requirement.
|
|
; We can generally assume any *single* heap layout we chose for the result of
|
|
; a malloc call, but we can't simultanious assume two different ones. As a
|
|
; result, we must make sure that we only fold conditions if we can ensure that
|
|
; we fold *all* potentially address capturing compares the same. This is
|
|
; the same point that applies to allocas, applied to noaiias/malloc.
|
|
|
|
; These two functions represents either a) forging a pointer via inttoptr or
|
|
; b) indexing off an adjacent allocation. In either case, the operation is
|
|
; obscured by an uninlined helper and not visible to instcombine.
|
|
declare i8* @hidden_inttoptr()
|
|
declare i8* @hidden_offset(i8* %other)
|
|
|
|
; FIXME: Missed oppurtunity
|
|
define i1 @ptrtoint_single_cmp() {
|
|
; CHECK-LABEL: @ptrtoint_single_cmp(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*)
|
|
; CHECK-NEXT: ret i1 [[CMP]]
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%rhs = inttoptr i64 2048 to i8*
|
|
%cmp = icmp eq i8* %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @offset_single_cmp() {
|
|
; CHECK-LABEL: @offset_single_cmp(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%n = call i8* @malloc(i64 4)
|
|
%rhs = getelementptr i8, i8* %n, i32 4
|
|
%cmp = icmp eq i8* %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
declare void @witness(i1, i1)
|
|
|
|
define void @neg_consistent_fold1() {
|
|
; CHECK-LABEL: @neg_consistent_fold1(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[RHS2:%.*]] = call i8* @hidden_inttoptr()
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*)
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8* [[M]], [[RHS2]]
|
|
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%rhs = inttoptr i64 2048 to i8*
|
|
%rhs2 = call i8* @hidden_inttoptr()
|
|
%cmp1 = icmp eq i8* %m, %rhs
|
|
%cmp2 = icmp eq i8* %m, %rhs2
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
define void @neg_consistent_fold2() {
|
|
; CHECK-LABEL: @neg_consistent_fold2(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[N:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[RHS:%.*]] = getelementptr i8, i8* [[N]], i64 4
|
|
; CHECK-NEXT: [[RHS2:%.*]] = call i8* @hidden_offset(i8* [[N]])
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8* [[M]], [[RHS]]
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8* [[M]], [[RHS2]]
|
|
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%n = call i8* @malloc(i64 4)
|
|
%rhs = getelementptr i8, i8* %n, i32 4
|
|
%rhs2 = call i8* @hidden_offset(i8* %n)
|
|
%cmp1 = icmp eq i8* %m, %rhs
|
|
%cmp2 = icmp eq i8* %m, %rhs2
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
define void @neg_consistent_fold3() {
|
|
; CHECK-LABEL: @neg_consistent_fold3(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[M]] to i32*
|
|
; CHECK-NEXT: [[LGP:%.*]] = load i32*, i32** @gp, align 8
|
|
; CHECK-NEXT: [[RHS2:%.*]] = call i8* @hidden_inttoptr()
|
|
; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i32* [[LGP]], [[BC]]
|
|
; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8* [[M]], [[RHS2]]
|
|
; CHECK-NEXT: call void @witness(i1 [[CMP1]], i1 [[CMP2]])
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8
|
|
%rhs2 = call i8* @hidden_inttoptr()
|
|
%cmp1 = icmp eq i32* %bc, %lgp
|
|
%cmp2 = icmp eq i8* %m, %rhs2
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
; FIXME: This appears correct, but the current implementation relies
|
|
; on visiting both cmps in the same pass. We may have an simplification order
|
|
; under which one is missed, and that would be a bug.
|
|
define void @neg_consistent_fold4() {
|
|
; CHECK-LABEL: @neg_consistent_fold4(
|
|
; CHECK-NEXT: call void @witness(i1 false, i1 false)
|
|
; CHECK-NEXT: ret void
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8
|
|
%cmp1 = icmp eq i32* %bc, %lgp
|
|
%cmp2 = icmp eq i32* %bc, %lgp
|
|
call void @witness(i1 %cmp1, i1 %cmp2)
|
|
ret void
|
|
}
|
|
|
|
declare void @unknown(i8*)
|
|
|
|
; Points out that a nocapture call can't cause a consistent result issue
|
|
; as it is (by assumption) not able to contain a comparison which might
|
|
; capture the address.
|
|
|
|
define i1 @consistent_nocapture_inttoptr() {
|
|
; CHECK-LABEL: @consistent_nocapture_inttoptr(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: call void @unknown(i8* nocapture [[M]])
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*)
|
|
; CHECK-NEXT: ret i1 [[CMP]]
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
call void @unknown(i8* nocapture %m)
|
|
%rhs = inttoptr i64 2048 to i8*
|
|
%cmp = icmp eq i8* %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @consistent_nocapture_offset() {
|
|
; CHECK-LABEL: @consistent_nocapture_offset(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: call void @unknown(i8* nocapture [[M]])
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
call void @unknown(i8* nocapture %m)
|
|
%n = call i8* @malloc(i64 4)
|
|
%rhs = getelementptr i8, i8* %n, i32 4
|
|
%cmp = icmp eq i8* %m, %rhs
|
|
ret i1 %cmp
|
|
}
|
|
|
|
define i1 @consistent_nocapture_through_global() {
|
|
; CHECK-LABEL: @consistent_nocapture_through_global(
|
|
; CHECK-NEXT: [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: call void @unknown(i8* nocapture [[M]])
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call i8* @malloc(i64 4)
|
|
call void @unknown(i8* nocapture %m)
|
|
%bc = bitcast i8* %m to i32*
|
|
%lgp = load i32*, i32** @gp, align 8, !nonnull !0
|
|
%cmp = icmp eq i32* %bc, %lgp
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; End consistent heap layout tests
|
|
|
|
; We can fold this by assuming a single heap layout
|
|
define i1 @two_nonnull_mallocs() {
|
|
; CHECK-LABEL: @two_nonnull_mallocs(
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call nonnull i8* @malloc(i64 4)
|
|
%n = call nonnull i8* @malloc(i64 4)
|
|
%cmp = icmp eq i8* %m, %n
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; The address of %n is captured, but %m can be arranged to make
|
|
; the comparison non-equal.
|
|
define i1 @two_nonnull_mallocs2() {
|
|
; CHECK-LABEL: @two_nonnull_mallocs2(
|
|
; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: call void @unknown(i8* nonnull [[N]])
|
|
; CHECK-NEXT: ret i1 false
|
|
;
|
|
%m = call nonnull i8* @malloc(i64 4)
|
|
%n = call nonnull i8* @malloc(i64 4)
|
|
call void @unknown(i8* %n)
|
|
%cmp = icmp eq i8* %m, %n
|
|
ret i1 %cmp
|
|
}
|
|
|
|
; TODO: We can fold this, but don't with the current scheme.
|
|
define i1 @two_nonnull_mallocs_hidden() {
|
|
; CHECK-LABEL: @two_nonnull_mallocs_hidden(
|
|
; CHECK-NEXT: [[M:%.*]] = call nonnull dereferenceable(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[N:%.*]] = call nonnull dereferenceable(4) i8* @malloc(i64 4)
|
|
; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i8, i8* [[M]], i64 1
|
|
; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8* [[N]], i64 2
|
|
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[GEP1]], [[GEP2]]
|
|
; CHECK-NEXT: ret i1 [[CMP]]
|
|
;
|
|
%m = call nonnull i8* @malloc(i64 4)
|
|
%n = call nonnull i8* @malloc(i64 4)
|
|
%gep1 = getelementptr i8, i8* %m, i32 1
|
|
%gep2 = getelementptr i8, i8* %n, i32 2
|
|
%cmp = icmp eq i8* %gep1, %gep2
|
|
ret i1 %cmp
|
|
}
|
|
|
|
|
|
!0 = !{}
|
|
|
|
|