llvm-project/llvm/test/Transforms/InstCombine/compare-unescaped.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -passes=instcombine -S < %s | FileCheck %s

@gp = global i32* null, align 8

declare noalias i8* @malloc(i64)

define i1 @compare_global_trivialeq() {
; CHECK-LABEL: @compare_global_trivialeq(
; CHECK-NEXT:    ret i1 false
;
  %m = call i8* @malloc(i64 4)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8
  %cmp = icmp eq i32* %bc, %lgp
  ret i1 %cmp
}

define i1 @compare_global_trivialne() {
; CHECK-LABEL: @compare_global_trivialne(
; CHECK-NEXT:    ret i1 true
;
  %m = call i8* @malloc(i64 4)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8
  %cmp = icmp ne i32* %bc, %lgp
  ret i1 %cmp
}


; Although the %m is marked nocapture in the deopt operand in call to function f,
; we cannot remove the alloc site: call to malloc
; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
declare void @f()
define i1 @compare_and_call_with_deopt() {
; CHECK-LABEL: @compare_and_call_with_deopt(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
; CHECK-NEXT:    tail call void @f() [ "deopt"(i8* [[M]]) ]
; CHECK-NEXT:    ret i1 false
;
  %m = call i8* @malloc(i64 24)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
  %cmp = icmp eq i32* %lgp, %bc
  tail call void @f() [ "deopt"(i8* %m) ]
  ret i1 %cmp
}

; Same functon as above with deopt operand in function f, but comparison is NE
define i1 @compare_ne_and_call_with_deopt() {
; CHECK-LABEL: @compare_ne_and_call_with_deopt(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
; CHECK-NEXT:    tail call void @f() [ "deopt"(i8* [[M]]) ]
; CHECK-NEXT:    ret i1 true
;
  %m = call i8* @malloc(i64 24)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
  %cmp = icmp ne i32* %lgp, %bc
  tail call void @f() [ "deopt"(i8* %m) ]
  ret i1 %cmp
}

; Same function as above, but global not marked nonnull, and we cannot fold the comparison
define i1 @compare_ne_global_maybe_null() {
; CHECK-LABEL: @compare_ne_global_maybe_null(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[M]] to i32*
; CHECK-NEXT:    [[LGP:%.*]] = load i32*, i32** @gp, align 8
; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32* [[LGP]], [[BC]]
; CHECK-NEXT:    tail call void @f() [ "deopt"(i8* [[M]]) ]
; CHECK-NEXT:    ret i1 [[CMP]]
;
  %m = call i8* @malloc(i64 24)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp
  %cmp = icmp ne i32* %lgp, %bc
  tail call void @f() [ "deopt"(i8* %m) ]
  ret i1 %cmp
}

; FIXME: The comparison should fold to false since %m escapes (call to function escape)
; after the comparison.
declare void @escape(i8*)
define i1 @compare_and_call_after() {
; CHECK-LABEL: @compare_and_call_after(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(24) i8* @malloc(i64 24)
; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[M]] to i32*
; CHECK-NEXT:    [[LGP:%.*]] = load i32*, i32** @gp, align 8, !nonnull !0
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[LGP]], [[BC]]
; CHECK-NEXT:    br i1 [[CMP]], label [[ESCAPE_CALL:%.*]], label [[JUST_RETURN:%.*]]
; CHECK:       escape_call:
; CHECK-NEXT:    call void @escape(i8* [[M]])
; CHECK-NEXT:    ret i1 true
; CHECK:       just_return:
; CHECK-NEXT:    ret i1 [[CMP]]
;
  %m = call i8* @malloc(i64 24)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
  %cmp = icmp eq i32* %bc, %lgp
  br i1 %cmp, label %escape_call, label %just_return

escape_call:
  call void @escape(i8* %m)
  ret i1 true

just_return:
  ret i1 %cmp
}

define i1 @compare_distinct_mallocs() {
; CHECK-LABEL: @compare_distinct_mallocs(
; CHECK-NEXT:    ret i1 false
;
  %m = call i8* @malloc(i64 4)
  %n = call i8* @malloc(i64 4)
  %cmp = icmp eq i8* %m, %n
  ret i1 %cmp
}

; the compare is folded to true since the folding compare looks through bitcasts.
; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
define i1 @compare_samepointer_under_bitcast() {
; CHECK-LABEL: @compare_samepointer_under_bitcast(
; CHECK-NEXT:    ret i1 true
;
  %m = call i8* @malloc(i64 4)
  %bc = bitcast i8* %m to i32*
  %bcback = bitcast i32* %bc to i8*
  %cmp = icmp eq i8* %m, %bcback
  ret i1 %cmp
}

; the compare is folded to true since the folding compare looks through bitcasts.
; The malloc call for %m cannot be elided since it is used in the call to function f.
define i1 @compare_samepointer_escaped() {
; CHECK-LABEL: @compare_samepointer_escaped(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    call void @f() [ "deopt"(i8* [[M]]) ]
; CHECK-NEXT:    ret i1 true
;
  %m = call i8* @malloc(i64 4)
  %bc = bitcast i8* %m to i32*
  %bcback = bitcast i32* %bc to i8*
  %cmp = icmp eq i8* %m, %bcback
  call void @f() [ "deopt"(i8* %m) ]
  ret i1 %cmp
}

; Technically, we can fold the %cmp2 comparison, even though %m escapes through
; the ret statement since `ret` terminates the function and we cannot reach from
; the ret to cmp.
; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
; cross-threading data dependencies since we do not make the distinction between
; atomic and non-atomic loads in capture tracking.
define i8* @compare_ret_escape(i8* %c) {
; CHECK-LABEL: @compare_ret_escape(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[N:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[N]], [[C:%.*]]
; CHECK-NEXT:    br i1 [[CMP]], label [[RETST:%.*]], label [[CHK:%.*]]
; CHECK:       retst:
; CHECK-NEXT:    ret i8* [[M]]
; CHECK:       chk:
; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[M]] to i32*
; CHECK-NEXT:    [[LGP:%.*]] = load i32*, i32** @gp, align 8, !nonnull !0
; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i32* [[LGP]], [[BC]]
; CHECK-NEXT:    br i1 [[CMP2]], label [[RETST]], label [[CHK2:%.*]]
; CHECK:       chk2:
; CHECK-NEXT:    ret i8* [[N]]
;
  %m = call i8* @malloc(i64 4)
  %n = call i8* @malloc(i64 4)
  %cmp = icmp eq i8* %n, %c
  br i1 %cmp, label %retst, label %chk

retst:
  ret i8* %m

chk:
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
  %cmp2 = icmp eq i32* %bc, %lgp
  br i1 %cmp2, label %retst,  label %chk2

chk2:
  ret i8* %n
}

; The malloc call for %m cannot be elided since it is used in the call to function f.
; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
define i1 @compare_distinct_pointer_escape() {
; CHECK-LABEL: @compare_distinct_pointer_escape(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    tail call void @f() [ "deopt"(i8* [[M]]) ]
; CHECK-NEXT:    ret i1 true
;
  %m = call i8* @malloc(i64 4)
  %n = call i8* @malloc(i64 4)
  tail call void @f() [ "deopt"(i8* %m) ]
  %cmp = icmp ne i8* %m, %n
  ret i1 %cmp
}

; The next block of tests demonstrate a very subtle correctness requirement.
; We can generally assume any *single* heap layout we chose for the result of
; a malloc call, but we can't simultanious assume two different ones.  As a
; result, we must make sure that we only fold conditions if we can ensure that
; we fold *all* potentially address capturing compares the same.  This is
; the same point that applies to allocas, applied to noaiias/malloc.

; These two functions represents either a) forging a pointer via inttoptr or
; b) indexing off an adjacent allocation.  In either case, the operation is
; obscured by an uninlined helper and not visible to instcombine.
declare i8* @hidden_inttoptr()
declare i8* @hidden_offset(i8* %other)

; FIXME: Missed oppurtunity
define i1 @ptrtoint_single_cmp() {
; CHECK-LABEL: @ptrtoint_single_cmp(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*)
; CHECK-NEXT:    ret i1 [[CMP]]
;
  %m = call i8* @malloc(i64 4)
  %rhs = inttoptr i64 2048 to i8*
  %cmp = icmp eq i8* %m, %rhs
  ret i1 %cmp
}

define i1 @offset_single_cmp() {
; CHECK-LABEL: @offset_single_cmp(
; CHECK-NEXT:    ret i1 false
;
  %m = call i8* @malloc(i64 4)
  %n = call i8* @malloc(i64 4)
  %rhs = getelementptr i8, i8* %n, i32 4
  %cmp = icmp eq i8* %m, %rhs
  ret i1 %cmp
}

declare void @witness(i1, i1)

define void @neg_consistent_fold1() {
; CHECK-LABEL: @neg_consistent_fold1(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[RHS2:%.*]] = call i8* @hidden_inttoptr()
; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*)
; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i8* [[M]], [[RHS2]]
; CHECK-NEXT:    call void @witness(i1 [[CMP1]], i1 [[CMP2]])
; CHECK-NEXT:    ret void
;
  %m = call i8* @malloc(i64 4)
  %rhs = inttoptr i64 2048 to i8*
  %rhs2 = call i8* @hidden_inttoptr()
  %cmp1 = icmp eq i8* %m, %rhs
  %cmp2 = icmp eq i8* %m, %rhs2
  call void @witness(i1 %cmp1, i1 %cmp2)
  ret void
}

define void @neg_consistent_fold2() {
; CHECK-LABEL: @neg_consistent_fold2(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[N:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[RHS:%.*]] = getelementptr i8, i8* [[N]], i64 4
; CHECK-NEXT:    [[RHS2:%.*]] = call i8* @hidden_offset(i8* [[N]])
; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i8* [[M]], [[RHS]]
; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i8* [[M]], [[RHS2]]
; CHECK-NEXT:    call void @witness(i1 [[CMP1]], i1 [[CMP2]])
; CHECK-NEXT:    ret void
;
  %m = call i8* @malloc(i64 4)
  %n = call i8* @malloc(i64 4)
  %rhs = getelementptr i8, i8* %n, i32 4
  %rhs2 = call i8* @hidden_offset(i8* %n)
  %cmp1 = icmp eq i8* %m, %rhs
  %cmp2 = icmp eq i8* %m, %rhs2
  call void @witness(i1 %cmp1, i1 %cmp2)
  ret void
}

define void @neg_consistent_fold3() {
; CHECK-LABEL: @neg_consistent_fold3(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[BC:%.*]] = bitcast i8* [[M]] to i32*
; CHECK-NEXT:    [[LGP:%.*]] = load i32*, i32** @gp, align 8
; CHECK-NEXT:    [[RHS2:%.*]] = call i8* @hidden_inttoptr()
; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32* [[LGP]], [[BC]]
; CHECK-NEXT:    [[CMP2:%.*]] = icmp eq i8* [[M]], [[RHS2]]
; CHECK-NEXT:    call void @witness(i1 [[CMP1]], i1 [[CMP2]])
; CHECK-NEXT:    ret void
;
  %m = call i8* @malloc(i64 4)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8
  %rhs2 = call i8* @hidden_inttoptr()
  %cmp1 = icmp eq i32* %bc, %lgp
  %cmp2 = icmp eq i8* %m, %rhs2
  call void @witness(i1 %cmp1, i1 %cmp2)
  ret void
}

; FIXME: This appears correct, but the current implementation relies
; on visiting both cmps in the same pass.  We may have an simplification order
; under which one is missed, and that would be a bug.
define void @neg_consistent_fold4() {
; CHECK-LABEL: @neg_consistent_fold4(
; CHECK-NEXT:    call void @witness(i1 false, i1 false)
; CHECK-NEXT:    ret void
;
  %m = call i8* @malloc(i64 4)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8
  %cmp1 = icmp eq i32* %bc, %lgp
  %cmp2 = icmp eq i32* %bc, %lgp
  call void @witness(i1 %cmp1, i1 %cmp2)
  ret void
}

declare void @unknown(i8*)

; Points out that a nocapture call can't cause a consistent result issue
; as it is (by assumption) not able to contain a comparison which might
; capture the address.

define i1 @consistent_nocapture_inttoptr() {
; CHECK-LABEL: @consistent_nocapture_inttoptr(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    call void @unknown(i8* nocapture [[M]])
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[M]], inttoptr (i64 2048 to i8*)
; CHECK-NEXT:    ret i1 [[CMP]]
;
  %m = call i8* @malloc(i64 4)
  call void @unknown(i8* nocapture %m)
  %rhs = inttoptr i64 2048 to i8*
  %cmp = icmp eq i8* %m, %rhs
  ret i1 %cmp
}

define i1 @consistent_nocapture_offset() {
; CHECK-LABEL: @consistent_nocapture_offset(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    call void @unknown(i8* nocapture [[M]])
; CHECK-NEXT:    ret i1 false
;
  %m = call i8* @malloc(i64 4)
  call void @unknown(i8* nocapture %m)
  %n = call i8* @malloc(i64 4)
  %rhs = getelementptr i8, i8* %n, i32 4
  %cmp = icmp eq i8* %m, %rhs
  ret i1 %cmp
}

define i1 @consistent_nocapture_through_global() {
; CHECK-LABEL: @consistent_nocapture_through_global(
; CHECK-NEXT:    [[M:%.*]] = call dereferenceable_or_null(4) i8* @malloc(i64 4)
; CHECK-NEXT:    call void @unknown(i8* nocapture [[M]])
; CHECK-NEXT:    ret i1 false
;
  %m = call i8* @malloc(i64 4)
  call void @unknown(i8* nocapture %m)
  %bc = bitcast i8* %m to i32*
  %lgp = load i32*, i32** @gp, align 8, !nonnull !0
  %cmp = icmp eq i32* %bc, %lgp
  ret i1 %cmp
}

; End consistent heap layout tests

; We can fold this by assuming a single heap layout
define i1 @two_nonnull_mallocs() {
; CHECK-LABEL: @two_nonnull_mallocs(
; CHECK-NEXT:    ret i1 false
;
  %m = call nonnull i8* @malloc(i64 4)
  %n = call nonnull i8* @malloc(i64 4)
  %cmp = icmp eq i8* %m, %n
  ret i1 %cmp
}

; The address of %n is captured, but %m can be arranged to make
; the comparison non-equal.
define i1 @two_nonnull_mallocs2() {
; CHECK-LABEL: @two_nonnull_mallocs2(
; CHECK-NEXT:    [[N:%.*]] = call nonnull dereferenceable(4) i8* @malloc(i64 4)
; CHECK-NEXT:    call void @unknown(i8* nonnull [[N]])
; CHECK-NEXT:    ret i1 false
;
  %m = call nonnull i8* @malloc(i64 4)
  %n = call nonnull i8* @malloc(i64 4)
  call void @unknown(i8* %n)
  %cmp = icmp eq i8* %m, %n
  ret i1 %cmp
}

; TODO: We can fold this, but don't with the current scheme.
define i1 @two_nonnull_mallocs_hidden() {
; CHECK-LABEL: @two_nonnull_mallocs_hidden(
; CHECK-NEXT:    [[M:%.*]] = call nonnull dereferenceable(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[N:%.*]] = call nonnull dereferenceable(4) i8* @malloc(i64 4)
; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr i8, i8* [[M]], i64 1
; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr i8, i8* [[N]], i64 2
; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8* [[GEP1]], [[GEP2]]
; CHECK-NEXT:    ret i1 [[CMP]]
;
  %m = call nonnull i8* @malloc(i64 4)
  %n = call nonnull i8* @malloc(i64 4)
  %gep1 = getelementptr i8, i8* %m, i32 1
  %gep2 = getelementptr i8, i8* %n, i32 2
  %cmp = icmp eq i8* %gep1, %gep2
  ret i1 %cmp
}


!0 = !{}