llvm-project/llvm/test/Transforms/InstCombine/X86/x86-avx2.ll

; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

; Verify that instcombine is able to fold identity shuffles.

define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @identity_test_vpermd(
; CHECK-NEXT:    ret <8 x i32> [[A0:%.*]]
;
  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
  ret <8 x i32> %a
}

define <8 x float> @identity_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @identity_test_vpermps(
; CHECK-NEXT:    ret <8 x float> [[A0:%.*]]
;
  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
  ret <8 x float> %a
}

; Instcombine should be able to fold the following shuffle to a builtin shufflevector
; with a shuffle mask of all zeroes.

define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @zero_test_vpermd(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
  ret <8 x i32> %a
}

define <8 x float> @zero_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @zero_test_vpermps(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
; CHECK-NEXT:    ret <8 x float> [[TMP1]]
;
  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
  ret <8 x float> %a
}

; Verify that instcombine is able to fold constant shuffles.

define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @shuffle_test_vpermd(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
  ret <8 x i32> %a
}

define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @shuffle_test_vpermps(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT:    ret <8 x float> [[TMP1]]
;
  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
  ret <8 x float> %a
}

; Verify that instcombine is able to fold constant shuffles with undef mask elements.

define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
; CHECK-LABEL: @undef_test_vpermd(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
  ret <8 x i32> %a
}

define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
; CHECK-LABEL: @undef_test_vpermps(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
; CHECK-NEXT:    ret <8 x float> [[TMP1]]
;
  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
  ret <8 x float> %a
}

; Verify simplify demanded elts.

define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) {
; CHECK-LABEL: @elts_test_vpermd(
; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
;
  %1 = insertelement <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %a1, i32 0
  %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1)
  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <8 x i32> %3
}

define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {
; CHECK-LABEL: @elts_test_vpermps(
; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.*]], <8 x i32> [[A1:%.*]])
; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> zeroinitializer
; CHECK-NEXT:    ret <8 x float> [[TMP2]]
;
  %1 = insertelement <8 x i32> %a1, i32 0, i32 7
  %2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1)
  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
  ret <8 x float> %3
}

declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_test_checks.py`
[InstCombine] Move target-specific inst combining For a long time, the InstCombine pass handled target specific intrinsics. Having target specific code in general passes was noted as an area for improvement for a long time. D81728 moves most target specific code out of the InstCombine pass. Applying the target specific combinations in an extra pass would probably result in inferior optimizations compared to the current fixed-point iteration, therefore the InstCombine pass resorts to newly introduced functions in the TargetTransformInfo when it encounters unknown intrinsics. The patch should not have any effect on generated code (under the assumption that code never uses intrinsics from a foreign target). This introduces three new functions: TargetTransformInfo::instCombineIntrinsic TargetTransformInfo::simplifyDemandedUseBitsIntrinsic TargetTransformInfo::simplifyDemandedVectorEltsIntrinsic A few target specific parts are left in the InstCombine folder, where it makes sense to share code. The largest left-over part in InstCombineCalls.cpp is the code shared between arm and aarch64. This allows to move about 3000 lines out from InstCombine to the targets. Differential Revision: https://reviews.llvm.org/D81728 2020-06-03 21:56:40 +08:00			`; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S \| FileCheck %s`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`

			`; Verify that instcombine is able to fold identity shuffles.`

			`define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) {`
			`; CHECK-LABEL: @identity_test_vpermd(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: ret <8 x i32> [[A0:%.*]]`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`;`
			`%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)`
			`ret <8 x i32> %a`
			`}`

			`define <8 x float> @identity_test_vpermps(<8 x float> %a0) {`
			`; CHECK-LABEL: @identity_test_vpermps(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: ret <8 x float> [[A0:%.*]]`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`;`
			`%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)`
			`ret <8 x float> %a`
			`}`

			`; Instcombine should be able to fold the following shuffle to a builtin shufflevector`
			`; with a shuffle mask of all zeroes.`

			`define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) {`
			`; CHECK-LABEL: @zero_test_vpermd(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A0:%.]], <8 x i32> undef, <8 x i32> zeroinitializer`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; CHECK-NEXT: ret <8 x i32> [[TMP1]]`
			`;`
			`%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)`
			`ret <8 x i32> %a`
			`}`

			`define <8 x float> @zero_test_vpermps(<8 x float> %a0) {`
			`; CHECK-LABEL: @zero_test_vpermps(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A0:%.]], <8 x float> undef, <8 x i32> zeroinitializer`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; CHECK-NEXT: ret <8 x float> [[TMP1]]`
			`;`
			`%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)`
			`ret <8 x float> %a`
			`}`

			`; Verify that instcombine is able to fold constant shuffles.`

			`define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) {`
			`; CHECK-LABEL: @shuffle_test_vpermd(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A0:%.]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; CHECK-NEXT: ret <8 x i32> [[TMP1]]`
			`;`
			`%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)`
			`ret <8 x i32> %a`
			`}`

			`define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {`
			`; CHECK-LABEL: @shuffle_test_vpermps(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A0:%.]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; CHECK-NEXT: ret <8 x float> [[TMP1]]`
			`;`
			`%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)`
			`ret <8 x float> %a`
			`}`

			`; Verify that instcombine is able to fold constant shuffles with undef mask elements.`

			`define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {`
			`; CHECK-LABEL: @undef_test_vpermd(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A0:%.]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; CHECK-NEXT: ret <8 x i32> [[TMP1]]`
			`;`
			`%a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)`
			`ret <8 x i32> %a`
			`}`

			`define <8 x float> @undef_test_vpermps(<8 x float> %a0) {`
			`; CHECK-LABEL: @undef_test_vpermps(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x float> [[A0:%.]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; CHECK-NEXT: ret <8 x float> [[TMP1]]`
			`;`
			`%a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)`
			`ret <8 x float> %a`
			`}`

			`; Verify simplify demanded elts.`

			`define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) {`
			`; CHECK-LABEL: @elts_test_vpermd(`
[InstCombine] remove identity shuffle simplification for mask with undefs And simultaneously enhance SimplifyDemandedVectorElts() to rcognize that pattern. That preserves some of the old optimizations in IR. Given a shuffle that includes undef elements in an otherwise identity mask like: define <4 x float> @shuffle(<4 x float> %arg) { %shuf = shufflevector <4 x float> %arg, <4 x float> undef, <4 x i32> <i32 undef, i32 1, i32 2, i32 3> ret <4 x float> %shuf } We were simplifying that to the input operand. But as discussed in PR43958: https://bugs.llvm.org/show_bug.cgi?id=43958 ...that means that per-vector-element poison that would be stopped by the shuffle can now leak to the result. Also note that we still have (and there are tests for) the same transform with no undef elements in the mask (a fully-defined identity mask). I don't think there's any controversy about that case - it's a valid transform under any interpretation of shufflevector/undef/poison. Looking at a few of the diffs into codegen, I don't see any difference in final asm. So depending on your perspective, that's good (no real loss of optimization power) or bad (poison exists in the DAG, so we only partially fixed the bug). Differential Revision: https://reviews.llvm.org/D70246 2019-11-24 23:06:26 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = shufflevector <8 x i32> [[A0:%.]], <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>`
			`; CHECK-NEXT: ret <8 x i32> [[TMP1]]`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`;`
			`%1 = insertelement <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %a1, i32 0`
			`%2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1)`
			`%3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>`
			`ret <8 x i32> %3`
			`}`

			`define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {`
			`; CHECK-LABEL: @elts_test_vpermps(`
[InstCombine] regenerate test CHECKs; NFC There's a discussion about changing a shufflevector transform in: https://bugs.llvm.org/show_bug.cgi?id=43958 It would protect against our current undef/poison behavior, and these are all tests that could be affected. 2019-11-14 23:20:48 +08:00			`; CHECK-NEXT: [[TMP1:%.]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> [[A0:%.]], <8 x i32> [[A1:%.*]])`
Revert "Temporarily Revert "Add basic loop fusion pass."" The reversion apparently deleted the test/Transforms directory. Will be re-reverting again. llvm-svn: 358552 2019-04-17 12:52:47 +08:00			`; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> zeroinitializer`
			`; CHECK-NEXT: ret <8 x float> [[TMP2]]`
			`;`
			`%1 = insertelement <8 x i32> %a1, i32 0, i32 7`
			`%2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1)`
			`%3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer`
			`ret <8 x float> %3`
			`}`

			`declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)`
			`declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)`