llvm-project/llvm/test/Analysis/ValueTracking/knownzero-shift.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instsimplify -S < %s | FileCheck %s

define i1 @test(i8 %p, i8* %pq) {
; CHECK-LABEL: @test(
; CHECK-NEXT:    ret i1 false
;
  %q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits
  %1 = or i8 %p, 2                 ; %1[1] = 1
  %2 = and i8 %1, 254              ; %2[0] = 0, %2[1] = 1
  %A = lshr i8 %2, 1               ; We should know that %A is nonzero.
  %x = icmp eq i8 %A, 0
  ret i1 %x
}

!0 = !{ i8 1, i8 5 }

; The following cases only get folded by InstCombine,
; see InstCombine/shift-shift.ll. If we wanted to,
; we could explicitly handle them in InstSimplify as well.

define i32 @shl_shl(i32 %A) {
; CHECK-LABEL: @shl_shl(
; CHECK-NEXT:    [[B:%.*]] = shl i32 [[A:%.*]], 6
; CHECK-NEXT:    [[C:%.*]] = shl i32 [[B]], 28
; CHECK-NEXT:    ret i32 [[C]]
;
  %B = shl i32 %A, 6
  %C = shl i32 %B, 28
  ret i32 %C
}

define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) {
; CHECK-LABEL: @shl_shl_splat_vec(
; CHECK-NEXT:    [[B:%.*]] = shl <2 x i33> [[A:%.*]], <i33 5, i33 5>
; CHECK-NEXT:    [[C:%.*]] = shl <2 x i33> [[B]], <i33 28, i33 28>
; CHECK-NEXT:    ret <2 x i33> [[C]]
;
  %B = shl <2 x i33> %A, <i33 5, i33 5>
  %C = shl <2 x i33> %B, <i33 28, i33 28>
  ret <2 x i33> %C
}

; FIXME

define <2 x i33> @shl_shl_vec(<2 x i33> %A) {
; CHECK-LABEL: @shl_shl_vec(
; CHECK-NEXT:    [[B:%.*]] = shl <2 x i33> [[A:%.*]], <i33 6, i33 5>
; CHECK-NEXT:    [[C:%.*]] = shl <2 x i33> [[B]], <i33 27, i33 28>
; CHECK-NEXT:    ret <2 x i33> [[C]]
;
  %B = shl <2 x i33> %A, <i33 6, i33 5>
  %C = shl <2 x i33> %B, <i33 27, i33 28>
  ret <2 x i33> %C
}

define i232 @lshr_lshr(i232 %A) {
; CHECK-LABEL: @lshr_lshr(
; CHECK-NEXT:    [[B:%.*]] = lshr i232 [[A:%.*]], 231
; CHECK-NEXT:    [[C:%.*]] = lshr i232 [[B]], 1
; CHECK-NEXT:    ret i232 [[C]]
;
  %B = lshr i232 %A, 231
  %C = lshr i232 %B, 1
  ret i232 %C
}

define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) {
; CHECK-LABEL: @lshr_lshr_splat_vec(
; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 28, i32 28>
; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 4, i32 4>
; CHECK-NEXT:    ret <2 x i32> [[C]]
;
  %B = lshr <2 x i32> %A, <i32 28, i32 28>
  %C = lshr <2 x i32> %B, <i32 4, i32 4>
  ret <2 x i32> %C
}

define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) {
; CHECK-LABEL: @lshr_lshr_vec(
; CHECK-NEXT:    [[B:%.*]] = lshr <2 x i32> [[A:%.*]], <i32 29, i32 28>
; CHECK-NEXT:    [[C:%.*]] = lshr <2 x i32> [[B]], <i32 4, i32 5>
; CHECK-NEXT:    ret <2 x i32> [[C]]
;
  %B = lshr <2 x i32> %A, <i32 29, i32 28>
  %C = lshr <2 x i32> %B, <i32 4, i32 5>
  ret <2 x i32> %C
}
auto-generate checks llvm-svn: 281756 2016-09-17 01:54:52 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_test_checks.py`
[ValueTracking] Teach isKnownNonZero a new trick If the shifter operand is a constant, and all of the bits shifted out are known to be zero, then if X is known non-zero at least one non-zero bit must remain. llvm-svn: 248508 2015-09-25 00:06:32 +08:00			`; RUN: opt -instsimplify -S < %s \| FileCheck %s`

			`define i1 @test(i8 %p, i8* %pq) {`
auto-generate checks llvm-svn: 281756 2016-09-17 01:54:52 +08:00			`; CHECK-LABEL: @test(`
			`; CHECK-NEXT: ret i1 false`
			`;`
[ValueTracking] Teach isKnownNonZero a new trick If the shifter operand is a constant, and all of the bits shifted out are known to be zero, then if X is known non-zero at least one non-zero bit must remain. llvm-svn: 248508 2015-09-25 00:06:32 +08:00			`%q = load i8, i8* %pq, !range !0 ; %q is known nonzero; no known bits`
			`%1 = or i8 %p, 2 ; %1[1] = 1`
			`%2 = and i8 %1, 254 ; %2[0] = 0, %2[1] = 1`
			`%A = lshr i8 %2, 1 ; We should know that %A is nonzero.`
			`%x = icmp eq i8 %A, 0`
			`ret i1 %x`
			`}`

			`!0 = !{ i8 1, i8 5 }`
add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00
Reapply [InstSimplify] Remove known bits constant folding No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294 2020-03-20 18:57:20 +08:00			`; The following cases only get folded by InstCombine,`
			`; see InstCombine/shift-shift.ll. If we wanted to,`
			`; we could explicitly handle them in InstSimplify as well.`

add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00			`define i32 @shl_shl(i32 %A) {`
			`; CHECK-LABEL: @shl_shl(`
Reapply [InstSimplify] Remove known bits constant folding No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294 2020-03-20 18:57:20 +08:00			`; CHECK-NEXT: [[B:%.]] = shl i32 [[A:%.]], 6`
			`; CHECK-NEXT: [[C:%.*]] = shl i32 [[B]], 28`
			`; CHECK-NEXT: ret i32 [[C]]`
add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00			`;`
			`%B = shl i32 %A, 6`
			`%C = shl i32 %B, 28`
			`ret i32 %C`
			`}`

			`define <2 x i33> @shl_shl_splat_vec(<2 x i33> %A) {`
			`; CHECK-LABEL: @shl_shl_splat_vec(`
Reapply [InstSimplify] Remove known bits constant folding No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294 2020-03-20 18:57:20 +08:00			`; CHECK-NEXT: [[B:%.]] = shl <2 x i33> [[A:%.]], <i33 5, i33 5>`
			`; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], <i33 28, i33 28>`
			`; CHECK-NEXT: ret <2 x i33> [[C]]`
add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00			`;`
			`%B = shl <2 x i33> %A, <i33 5, i33 5>`
			`%C = shl <2 x i33> %B, <i33 28, i33 28>`
			`ret <2 x i33> %C`
			`}`

			`; FIXME`

			`define <2 x i33> @shl_shl_vec(<2 x i33> %A) {`
			`; CHECK-LABEL: @shl_shl_vec(`
Reapply [InstSimplify] Remove known bits constant folding No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294 2020-03-20 18:57:20 +08:00			`; CHECK-NEXT: [[B:%.]] = shl <2 x i33> [[A:%.]], <i33 6, i33 5>`
add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00			`; CHECK-NEXT: [[C:%.*]] = shl <2 x i33> [[B]], <i33 27, i33 28>`
			`; CHECK-NEXT: ret <2 x i33> [[C]]`
			`;`
			`%B = shl <2 x i33> %A, <i33 6, i33 5>`
			`%C = shl <2 x i33> %B, <i33 27, i33 28>`
			`ret <2 x i33> %C`
			`}`

			`define i232 @lshr_lshr(i232 %A) {`
			`; CHECK-LABEL: @lshr_lshr(`
Reapply [InstSimplify] Remove known bits constant folding No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294 2020-03-20 18:57:20 +08:00			`; CHECK-NEXT: [[B:%.]] = lshr i232 [[A:%.]], 231`
			`; CHECK-NEXT: [[C:%.*]] = lshr i232 [[B]], 1`
			`; CHECK-NEXT: ret i232 [[C]]`
add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00			`;`
			`%B = lshr i232 %A, 231`
			`%C = lshr i232 %B, 1`
			`ret i232 %C`
			`}`

			`define <2 x i32> @lshr_lshr_splat_vec(<2 x i32> %A) {`
			`; CHECK-LABEL: @lshr_lshr_splat_vec(`
Reapply [InstSimplify] Remove known bits constant folding No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294 2020-03-20 18:57:20 +08:00			`; CHECK-NEXT: [[B:%.]] = lshr <2 x i32> [[A:%.]], <i32 28, i32 28>`
			`; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], <i32 4, i32 4>`
			`; CHECK-NEXT: ret <2 x i32> [[C]]`
add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00			`;`
			`%B = lshr <2 x i32> %A, <i32 28, i32 28>`
			`%C = lshr <2 x i32> %B, <i32 4, i32 4>`
			`ret <2 x i32> %C`
			`}`

			`define <2 x i32> @lshr_lshr_vec(<2 x i32> %A) {`
			`; CHECK-LABEL: @lshr_lshr_vec(`
Reapply [InstSimplify] Remove known bits constant folding No changes relative to last time, but after a mitigation for an AMDGPU regression landed. --- If SimplifyInstruction() does not succeed in simplifying the instruction, it will compute the known bits of the instruction in the hope that all bits are known and the instruction can be folded to a constant. I have removed a similar optimization from InstCombine in D75801, and would like to drop this one as well. On average, we spend ~1% of total compile-time performing this known bits calculation. However, if we introduce some additional statistics for known bits computations and how many of them succeed in simplifying the instruction we get (on test-suite): instsimplify.NumKnownBits: 216 instsimplify.NumKnownBitsComputed: 13828375 valuetracking.NumKnownBitsComputed: 45860806 Out of ~14M known bits calculations (accounting for approximately one third of all known bits calculations), only 0.0015% succeed in producing a constant. Those cases where we do succeed to compute all known bits will get folded by other passes like InstCombine later. On test-suite, only lencod.test and GCC-C-execute-pr44858.test show a hash difference after this change. On lencod we see an improvement (a loop phi is optimized away), on the GCC torture test a regression (a function return value is determined only after IPSCCP, preventing propagation from a noinline function.) There are various regressions in InstSimplify tests. However, all of these cases are already handled by InstCombine, and corresponding tests have already been added there. Differential Revision: https://reviews.llvm.org/D79294 2020-03-20 18:57:20 +08:00			`; CHECK-NEXT: [[B:%.]] = lshr <2 x i32> [[A:%.]], <i32 29, i32 28>`
			`; CHECK-NEXT: [[C:%.*]] = lshr <2 x i32> [[B]], <i32 4, i32 5>`
			`; CHECK-NEXT: ret <2 x i32> [[C]]`
add tests to show missing analysis; NFC llvm-svn: 287998 2016-11-27 23:54:45 +08:00			`;`
			`%B = lshr <2 x i32> %A, <i32 29, i32 28>`
			`%C = lshr <2 x i32> %B, <i32 4, i32 5>`
			`ret <2 x i32> %C`
			`}`