llvm-project/llvm/test/CodeGen/X86/promote-cmp.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2   | FileCheck %s --check-prefix=SSE2
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE4
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx    | FileCheck %s --check-prefix=AVX1
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2   | FileCheck %s --check-prefix=AVX2

define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {
; SSE2-LABEL: PR45808:
; SSE2:       # %bb.0:
; SSE2-NEXT:    movdqa {{.*#+}} xmm4 = [2147483648,2147483648]
; SSE2-NEXT:    movdqa %xmm3, %xmm9
; SSE2-NEXT:    pxor %xmm4, %xmm9
; SSE2-NEXT:    movdqa %xmm1, %xmm6
; SSE2-NEXT:    pxor %xmm4, %xmm6
; SSE2-NEXT:    movdqa %xmm6, %xmm8
; SSE2-NEXT:    pcmpgtd %xmm9, %xmm8
; SSE2-NEXT:    movdqa %xmm2, %xmm7
; SSE2-NEXT:    pxor %xmm4, %xmm7
; SSE2-NEXT:    pxor %xmm0, %xmm4
; SSE2-NEXT:    movdqa %xmm4, %xmm5
; SSE2-NEXT:    pcmpgtd %xmm7, %xmm5
; SSE2-NEXT:    movdqa %xmm5, %xmm10
; SSE2-NEXT:    shufps {{.*#+}} xmm10 = xmm10[0,2],xmm8[0,2]
; SSE2-NEXT:    pcmpeqd %xmm9, %xmm6
; SSE2-NEXT:    pcmpeqd %xmm7, %xmm4
; SSE2-NEXT:    shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3]
; SSE2-NEXT:    andps %xmm10, %xmm4
; SSE2-NEXT:    shufps {{.*#+}} xmm5 = xmm5[1,3],xmm8[1,3]
; SSE2-NEXT:    orps %xmm4, %xmm5
; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm5[2,1,3,3]
; SSE2-NEXT:    psllq $63, %xmm4
; SSE2-NEXT:    psrad $31, %xmm4
; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
; SSE2-NEXT:    pand %xmm4, %xmm1
; SSE2-NEXT:    pandn %xmm3, %xmm4
; SSE2-NEXT:    por %xmm4, %xmm1
; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm5[0,1,1,3]
; SSE2-NEXT:    pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE2-NEXT:    psllq $63, %xmm3
; SSE2-NEXT:    psrad $31, %xmm3
; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]
; SSE2-NEXT:    pand %xmm3, %xmm0
; SSE2-NEXT:    pandn %xmm2, %xmm3
; SSE2-NEXT:    por %xmm3, %xmm0
; SSE2-NEXT:    retq
;
; SSE4-LABEL: PR45808:
; SSE4:       # %bb.0:
; SSE4-NEXT:    movdqa %xmm0, %xmm4
; SSE4-NEXT:    movdqa %xmm1, %xmm0
; SSE4-NEXT:    pcmpgtq %xmm3, %xmm0
; SSE4-NEXT:    movdqa %xmm4, %xmm5
; SSE4-NEXT:    pcmpgtq %xmm2, %xmm5
; SSE4-NEXT:    pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]
; SSE4-NEXT:    pcmpeqd %xmm6, %xmm6
; SSE4-NEXT:    pxor %xmm5, %xmm6
; SSE4-NEXT:    psllq $63, %xmm0
; SSE4-NEXT:    blendvpd %xmm0, %xmm1, %xmm3
; SSE4-NEXT:    pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero
; SSE4-NEXT:    psllq $63, %xmm0
; SSE4-NEXT:    blendvpd %xmm0, %xmm4, %xmm2
; SSE4-NEXT:    movapd %xmm2, %xmm0
; SSE4-NEXT:    movapd %xmm3, %xmm1
; SSE4-NEXT:    retq
;
; AVX1-LABEL: PR45808:
; AVX1:       # %bb.0:
; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm3
; AVX1-NEXT:    vpcmpgtq %xmm2, %xmm3, %xmm2
; AVX1-NEXT:    vpcmpgtq %xmm1, %xmm0, %xmm3
; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
; AVX1-NEXT:    vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX1-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX1-NEXT:    retq
;
; AVX2-LABEL: PR45808:
; AVX2:       # %bb.0:
; AVX2-NEXT:    vpcmpgtq %ymm1, %ymm0, %ymm2
; AVX2-NEXT:    vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2
; AVX2-NEXT:    vblendvpd %ymm2, %ymm0, %ymm1, %ymm0
; AVX2-NEXT:    retq
  %3 = icmp sgt <4 x i64> %0, %1
  %4 = xor <4 x i1> %3, <i1 true, i1 true, i1 false, i1 false>
  %5 = select <4 x i1> %4, <4 x i64> %0, <4 x i64> %1
  ret <4 x i64> %5
}
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
[CodeGen][X86] Remove unused trivial check-prefixes from all CodeGen/X86 directory. I had manually removed unused prefixes from CodeGen/X86 directory for more than 100 tests. I checked the change history for each of them at the beginning, and then I mainly focused on the format since I found all of the unused prefixes were result from either insensible copy or residuum after functional update. I think it's OK to remove the remaining X86 tests by script now. I wrote a rough script which works for me in most tests. I put it in llvm/utils temporarily for review and hope it may help other components owners. The tests in this patch are all generated by the tool and checked by update tool for the autogenerated tests. I skimmed them and checked about 30 tests and didn't find any unexpected changes. Reviewed By: mtrofin, MaskRay Differential Revision: https://reviews.llvm.org/D91496 2020-11-16 09:44:06 +08:00			`; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 \| FileCheck %s --check-prefix=SSE2`
			`; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 \| FileCheck %s --check-prefix=SSE4`
			`; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx \| FileCheck %s --check-prefix=AVX1`
			`; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 \| FileCheck %s --check-prefix=AVX2`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00
			`define <4 x i64> @PR45808(<4 x i64> %0, <4 x i64> %1) {`
			`; SSE2-LABEL: PR45808:`
			`; SSE2: # %bb.0:`
			`; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [2147483648,2147483648]`
[DAG] Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) (REAPPLIED) Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) -> bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)) Attempt to fold from a shuffle of a pair of binops to a binop of shuffles, as long as one/both of the binop sources are also shuffles that can be merged with the outer shuffle. This should guarantee that we remove one binop without introducing any additional shuffles. Technically there's potential for a merged shuffle's lowering to be poorer than the original shuffle, but it could also be better, and I'm not seeing any regressions as long as we keep the 'don't merge splats' rule already present in MergeInnerShuffle. This expands and generalizes an existing X86 combine and attempts to merge either of each binop's sources (with an on-the-fly commutation of the shuffle mask) - we couldn't do that in the x86 version as it had to stay in a form that DAGCombine's MergeInnerShuffle would still recognise. Fixes issue raised by @saugustine in rG5aa8f4c0843a where we were failing to replace null shuffle operands from MergeInnerShuffle to UNDEFs. Differential Revision: https://reviews.llvm.org/D96345 2021-02-17 19:41:41 +08:00			`; SSE2-NEXT: movdqa %xmm3, %xmm9`
			`; SSE2-NEXT: pxor %xmm4, %xmm9`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; SSE2-NEXT: movdqa %xmm1, %xmm6`
			`; SSE2-NEXT: pxor %xmm4, %xmm6`
[DAG] Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) (REAPPLIED) Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) -> bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)) Attempt to fold from a shuffle of a pair of binops to a binop of shuffles, as long as one/both of the binop sources are also shuffles that can be merged with the outer shuffle. This should guarantee that we remove one binop without introducing any additional shuffles. Technically there's potential for a merged shuffle's lowering to be poorer than the original shuffle, but it could also be better, and I'm not seeing any regressions as long as we keep the 'don't merge splats' rule already present in MergeInnerShuffle. This expands and generalizes an existing X86 combine and attempts to merge either of each binop's sources (with an on-the-fly commutation of the shuffle mask) - we couldn't do that in the x86 version as it had to stay in a form that DAGCombine's MergeInnerShuffle would still recognise. Fixes issue raised by @saugustine in rG5aa8f4c0843a where we were failing to replace null shuffle operands from MergeInnerShuffle to UNDEFs. Differential Revision: https://reviews.llvm.org/D96345 2021-02-17 19:41:41 +08:00			`; SSE2-NEXT: movdqa %xmm6, %xmm8`
			`; SSE2-NEXT: pcmpgtd %xmm9, %xmm8`
			`; SSE2-NEXT: movdqa %xmm2, %xmm7`
			`; SSE2-NEXT: pxor %xmm4, %xmm7`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; SSE2-NEXT: pxor %xmm0, %xmm4`
[DAG] Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) (REAPPLIED) Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) -> bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)) Attempt to fold from a shuffle of a pair of binops to a binop of shuffles, as long as one/both of the binop sources are also shuffles that can be merged with the outer shuffle. This should guarantee that we remove one binop without introducing any additional shuffles. Technically there's potential for a merged shuffle's lowering to be poorer than the original shuffle, but it could also be better, and I'm not seeing any regressions as long as we keep the 'don't merge splats' rule already present in MergeInnerShuffle. This expands and generalizes an existing X86 combine and attempts to merge either of each binop's sources (with an on-the-fly commutation of the shuffle mask) - we couldn't do that in the x86 version as it had to stay in a form that DAGCombine's MergeInnerShuffle would still recognise. Fixes issue raised by @saugustine in rG5aa8f4c0843a where we were failing to replace null shuffle operands from MergeInnerShuffle to UNDEFs. Differential Revision: https://reviews.llvm.org/D96345 2021-02-17 19:41:41 +08:00			`; SSE2-NEXT: movdqa %xmm4, %xmm5`
			`; SSE2-NEXT: pcmpgtd %xmm7, %xmm5`
			`; SSE2-NEXT: movdqa %xmm5, %xmm10`
			`; SSE2-NEXT: shufps {{.*#+}} xmm10 = xmm10[0,2],xmm8[0,2]`
			`; SSE2-NEXT: pcmpeqd %xmm9, %xmm6`
			`; SSE2-NEXT: pcmpeqd %xmm7, %xmm4`
			`; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,3],xmm6[1,3]`
			`; SSE2-NEXT: andps %xmm10, %xmm4`
			`; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,3],xmm8[1,3]`
			`; SSE2-NEXT: orps %xmm4, %xmm5`
			`; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[2,1,3,3]`
			`; SSE2-NEXT: psllq $63, %xmm4`
			`; SSE2-NEXT: psrad $31, %xmm4`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]`
[DAG] Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) (REAPPLIED) Fold shuffle(bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d))) -> bop(shuffle(x,y),shuffle(z,w)),bop(shuffle(a,b),shuffle(c,d)) Attempt to fold from a shuffle of a pair of binops to a binop of shuffles, as long as one/both of the binop sources are also shuffles that can be merged with the outer shuffle. This should guarantee that we remove one binop without introducing any additional shuffles. Technically there's potential for a merged shuffle's lowering to be poorer than the original shuffle, but it could also be better, and I'm not seeing any regressions as long as we keep the 'don't merge splats' rule already present in MergeInnerShuffle. This expands and generalizes an existing X86 combine and attempts to merge either of each binop's sources (with an on-the-fly commutation of the shuffle mask) - we couldn't do that in the x86 version as it had to stay in a form that DAGCombine's MergeInnerShuffle would still recognise. Fixes issue raised by @saugustine in rG5aa8f4c0843a where we were failing to replace null shuffle operands from MergeInnerShuffle to UNDEFs. Differential Revision: https://reviews.llvm.org/D96345 2021-02-17 19:41:41 +08:00			`; SSE2-NEXT: pand %xmm4, %xmm1`
			`; SSE2-NEXT: pandn %xmm3, %xmm4`
			`; SSE2-NEXT: por %xmm4, %xmm1`
			`; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm5[0,1,1,3]`
[NFC][X86][Codegen] Megacommit: mass-regenerate all check lines that were already autogenerated The motivation is that the update script has at least two deviations (`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from what pretty much all the checklines were generated with, and most of the tests are still not updated, so each time one of the non-up-to-date tests is updated to see the effect of the code change, there is a lot of noise. Instead of having to deal with that each time, let's just deal with everything at once. This has been done via: ``` cd llvm-project/llvm/test/CodeGen/X86 grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" \| xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc ``` Not all tests were regenerated, however. 2021-06-12 04:26:17 +08:00			`; SSE2-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3`
[X86][SSE] Start shuffle combining from ANY_EXTEND_VECTOR_INREG on SSE targets We already do this on AVX (+ for ZERO_EXTEND_VECTOR_INREG), but this enables it for all SSE targets - we attempted something similar back at rL357057 but hit issues with the ZERO_EXTEND_VECTOR_INREG handling (PR41249). I'm still looking at the vector-mul.ll regression - which is due to 32-bit targets performing the load as a f64, resulting in the shuffle combiner thinking it has to create a shuffle in the float domain. 2020-08-03 19:18:21 +08:00			`; SSE2-NEXT: psllq $63, %xmm3`
			`; SSE2-NEXT: psrad $31, %xmm3`
			`; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3]`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; SSE2-NEXT: pand %xmm3, %xmm0`
			`; SSE2-NEXT: pandn %xmm2, %xmm3`
			`; SSE2-NEXT: por %xmm3, %xmm0`
			`; SSE2-NEXT: retq`
			`;`
			`; SSE4-LABEL: PR45808:`
			`; SSE4: # %bb.0:`
			`; SSE4-NEXT: movdqa %xmm0, %xmm4`
			`; SSE4-NEXT: movdqa %xmm1, %xmm0`
			`; SSE4-NEXT: pcmpgtq %xmm3, %xmm0`
Revert "[X86] Fold shuffle(not(x),undef) -> not(shuffle(x,undef))" This reverts commit 925093d88ae74560a8e94cf66f95d60ea3ffa2d3. Causes an infinite loop when compiling some shuffles: $ cat bugpoint-reduced-simplified.ll target triple = "x86_64-unknown-linux-gnu" define void @foo() { entry: %0 = load i8, i8* undef, align 1 %broadcast.splatinsert = insertelement <16 x i8> poison, i8 %0, i32 0 %1 = icmp ne <16 x i8> %broadcast.splatinsert, zeroinitializer %2 = shufflevector <16 x i1> %1, <16 x i1> undef, <16 x i32> zeroinitializer %wide.load = load <16 x i8>, <16 x i8>* undef, align 1 %3 = icmp ne <16 x i8> %wide.load, zeroinitializer %4 = and <16 x i1> %3, %2 %5 = zext <16 x i1> %4 to <16 x i8> store <16 x i8> %5, <16 x i8>* undef, align 1 ret void } $ llc < bugpoint-reduced-simplified.ll <timeout> 2021-03-02 18:21:54 +08:00			`; SSE4-NEXT: movdqa %xmm4, %xmm5`
			`; SSE4-NEXT: pcmpgtq %xmm2, %xmm5`
			`; SSE4-NEXT: pshufd {{.*#+}} xmm5 = xmm5[0,2,2,3]`
[X86] Add vector support to targetShrinkDemandedConstant for OR/XOR opcodes If a constant is only allsignbits in the demanded/active bits, then sign extend it to an allsignbits bool pattern for OR/XOR ops. This also requires SimplifyDemandedBits XOR handling to be modified to call ShrinkDemandedConstant on any (non-NOT) XOR pattern to account for non-splat cases. Next step towards fixing PR45808 - with this patch we now get a <-1,-1,0,0> v4i64 constant instead of <1,1,0,0>. Differential Revision: https://reviews.llvm.org/D82257 2020-06-29 19:18:27 +08:00			`; SSE4-NEXT: pcmpeqd %xmm6, %xmm6`
Revert "[X86] Fold shuffle(not(x),undef) -> not(shuffle(x,undef))" This reverts commit 925093d88ae74560a8e94cf66f95d60ea3ffa2d3. Causes an infinite loop when compiling some shuffles: $ cat bugpoint-reduced-simplified.ll target triple = "x86_64-unknown-linux-gnu" define void @foo() { entry: %0 = load i8, i8* undef, align 1 %broadcast.splatinsert = insertelement <16 x i8> poison, i8 %0, i32 0 %1 = icmp ne <16 x i8> %broadcast.splatinsert, zeroinitializer %2 = shufflevector <16 x i1> %1, <16 x i1> undef, <16 x i32> zeroinitializer %wide.load = load <16 x i8>, <16 x i8>* undef, align 1 %3 = icmp ne <16 x i8> %wide.load, zeroinitializer %4 = and <16 x i1> %3, %2 %5 = zext <16 x i1> %4 to <16 x i8> store <16 x i8> %5, <16 x i8>* undef, align 1 ret void } $ llc < bugpoint-reduced-simplified.ll <timeout> 2021-03-02 18:21:54 +08:00			`; SSE4-NEXT: pxor %xmm5, %xmm6`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; SSE4-NEXT: psllq $63, %xmm0`
			`; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm3`
Revert "[X86] Fold shuffle(not(x),undef) -> not(shuffle(x,undef))" This reverts commit 925093d88ae74560a8e94cf66f95d60ea3ffa2d3. Causes an infinite loop when compiling some shuffles: $ cat bugpoint-reduced-simplified.ll target triple = "x86_64-unknown-linux-gnu" define void @foo() { entry: %0 = load i8, i8* undef, align 1 %broadcast.splatinsert = insertelement <16 x i8> poison, i8 %0, i32 0 %1 = icmp ne <16 x i8> %broadcast.splatinsert, zeroinitializer %2 = shufflevector <16 x i1> %1, <16 x i1> undef, <16 x i32> zeroinitializer %wide.load = load <16 x i8>, <16 x i8>* undef, align 1 %3 = icmp ne <16 x i8> %wide.load, zeroinitializer %4 = and <16 x i1> %3, %2 %5 = zext <16 x i1> %4 to <16 x i8> store <16 x i8> %5, <16 x i8>* undef, align 1 ret void } $ llc < bugpoint-reduced-simplified.ll <timeout> 2021-03-02 18:21:54 +08:00			`; SSE4-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm6[0],zero,xmm6[1],zero`
			`; SSE4-NEXT: psllq $63, %xmm0`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; SSE4-NEXT: blendvpd %xmm0, %xmm4, %xmm2`
			`; SSE4-NEXT: movapd %xmm2, %xmm0`
			`; SSE4-NEXT: movapd %xmm3, %xmm1`
			`; SSE4-NEXT: retq`
			`;`
			`; AVX1-LABEL: PR45808:`
			`; AVX1: # %bb.0:`
			`; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2`
			`; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3`
			`; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2`
			`; AVX1-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm3`
			`; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2`
[NFC][X86][Codegen] Megacommit: mass-regenerate all check lines that were already autogenerated The motivation is that the update script has at least two deviations (`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from what pretty much all the checklines were generated with, and most of the tests are still not updated, so each time one of the non-up-to-date tests is updated to see the effect of the code change, there is a lot of noise. Instead of having to deal with that each time, let's just deal with everything at once. This has been done via: ``` cd llvm-project/llvm/test/CodeGen/X86 grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" \| xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc ``` Not all tests were regenerated, however. 2021-06-12 04:26:17 +08:00			`; AVX1-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; AVX1-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0`
			`; AVX1-NEXT: retq`
			`;`
			`; AVX2-LABEL: PR45808:`
			`; AVX2: # %bb.0:`
			`; AVX2-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm2`
[NFC][X86][Codegen] Megacommit: mass-regenerate all check lines that were already autogenerated The motivation is that the update script has at least two deviations (`<...>@GOT`/`<...>@PLT`/ and not hiding pointer arithmetics) from what pretty much all the checklines were generated with, and most of the tests are still not updated, so each time one of the non-up-to-date tests is updated to see the effect of the code change, there is a lot of noise. Instead of having to deal with that each time, let's just deal with everything at once. This has been done via: ``` cd llvm-project/llvm/test/CodeGen/X86 grep -rl "; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py" \| xargs -L1 <...>/llvm-project/llvm/utils/update_llc_test_checks.py --llc-binary <...>/llvm-project/build/bin/llc ``` Not all tests were regenerated, however. 2021-06-12 04:26:17 +08:00			`; AVX2-NEXT: vpxor {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2`
[X86][AVX] Add PR45808 test case for badly promoted comparison mask arithmetic 2020-05-07 02:05:42 +08:00			`; AVX2-NEXT: vblendvpd %ymm2, %ymm0, %ymm1, %ymm0`
			`; AVX2-NEXT: retq`
			`%3 = icmp sgt <4 x i64> %0, %1`
			`%4 = xor <4 x i1> %3, <i1 true, i1 true, i1 false, i1 false>`
			`%5 = select <4 x i1> %4, <4 x i64> %0, <4 x i64> %1`
			`ret <4 x i64> %5`
			`}`