llvm-project/llvm/test/CodeGen/X86/setcc-lowering.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
; RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=knl < %s   | FileCheck %s --check-prefix=KNL-32


; Verify that we don't crash during codegen due to a wrong lowering
; of a setcc node with illegal operand types and return type.

define <8 x i16> @pr25080(<8 x i32> %a) {
; AVX-LABEL: pr25080:
; AVX:       # BB#0: # %entry
; AVX-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
; AVX-NEXT:    vpcmpeqd %xmm2, %xmm1, %xmm1
; AVX-NEXT:    vpcmpeqd %xmm2, %xmm0, %xmm0
; AVX-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0
; AVX-NEXT:    vpor {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT:    vpsllw $15, %xmm0, %xmm0
; AVX-NEXT:    vpsraw $15, %xmm0, %xmm0
; AVX-NEXT:    vzeroupper
; AVX-NEXT:    retq
;
; KNL-32-LABEL: pr25080:
; KNL-32:       # BB#0: # %entry
; KNL-32-NEXT:    vpbroadcastd {{\.LCPI.*}}, %ymm1
; KNL-32-NEXT:    vpand %ymm1, %ymm0, %ymm0
; KNL-32-NEXT:    vpxor %ymm1, %ymm1, %ymm1
; KNL-32-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0
; KNL-32-NEXT:    movb $15, %al
; KNL-32-NEXT:    kmovw %eax, %k1
; KNL-32-NEXT:    korw %k1, %k0, %k1
; KNL-32-NEXT:    vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-32-NEXT:    vpmovqw %zmm0, %xmm0
; KNL-32-NEXT:    retl
entry:
  %0 = trunc <8 x i32> %a to <8 x i23>
  %1 = icmp eq <8 x i23> %0, zeroinitializer
  %2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>
  %3 = sext <8 x i1> %2 to <8 x i16>
  ret <8 x i16> %3
}

define void @pr26232(i64 %a, <16 x i1> %b) {
; AVX-LABEL: pr26232:
; AVX:       # BB#0: # %for_loop599.preheader
; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
; AVX-NEXT:    vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; AVX-NEXT:    .p2align 4, 0x90
; AVX-NEXT:  .LBB1_1: # %for_loop599
; AVX-NEXT:    # =>This Inner Loop Header: Depth=1
; AVX-NEXT:    xorl %eax, %eax
; AVX-NEXT:    cmpq $65536, %rdi # imm = 0x10000
; AVX-NEXT:    setl %al
; AVX-NEXT:    vmovd %eax, %xmm3
; AVX-NEXT:    vpshufb %xmm1, %xmm3, %xmm3
; AVX-NEXT:    vpand %xmm0, %xmm3, %xmm3
; AVX-NEXT:    vpsllw $7, %xmm3, %xmm3
; AVX-NEXT:    vpand %xmm2, %xmm3, %xmm3
; AVX-NEXT:    vpcmpgtb %xmm3, %xmm1, %xmm3
; AVX-NEXT:    vpmovmskb %xmm3, %eax
; AVX-NEXT:    testw %ax, %ax
; AVX-NEXT:    jne .LBB1_1
; AVX-NEXT:  # BB#2: # %for_exit600
; AVX-NEXT:    retq
;
; KNL-32-LABEL: pr26232:
; KNL-32:       # BB#0: # %for_loop599.preheader
; KNL-32-NEXT:    pushl %esi
; KNL-32-NEXT:  .Lcfi0:
; KNL-32-NEXT:    .cfi_def_cfa_offset 8
; KNL-32-NEXT:  .Lcfi1:
; KNL-32-NEXT:    .cfi_offset %esi, -8
; KNL-32-NEXT:    vpmovsxbd %xmm0, %zmm0
; KNL-32-NEXT:    vpslld $31, %zmm0, %zmm0
; KNL-32-NEXT:    vptestmd %zmm0, %zmm0, %k0
; KNL-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
; KNL-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
; KNL-32-NEXT:    movw $-1, %dx
; KNL-32-NEXT:    .p2align 4, 0x90
; KNL-32-NEXT:  .LBB1_1: # %for_loop599
; KNL-32-NEXT:    # =>This Inner Loop Header: Depth=1
; KNL-32-NEXT:    cmpl $65536, %ecx # imm = 0x10000
; KNL-32-NEXT:    movl %eax, %esi
; KNL-32-NEXT:    sbbl $0, %esi
; KNL-32-NEXT:    movl $0, %esi
; KNL-32-NEXT:    cmovlw %dx, %si
; KNL-32-NEXT:    kmovw %esi, %k1
; KNL-32-NEXT:    kandw %k0, %k1, %k1
; KNL-32-NEXT:    kmovw %k1, %esi
; KNL-32-NEXT:    testw %si, %si
; KNL-32-NEXT:    jne .LBB1_1
; KNL-32-NEXT:  # BB#2: # %for_exit600
; KNL-32-NEXT:    popl %esi
; KNL-32-NEXT:  .Lcfi2:
; KNL-32-NEXT:    .cfi_def_cfa_offset 4
; KNL-32-NEXT:    retl
allocas:
  br label %for_test11.preheader

for_test11.preheader:                             ; preds = %for_test11.preheader, %allocas
  br i1 undef, label %for_loop599, label %for_test11.preheader

for_loop599:                                      ; preds = %for_loop599, %for_test11.preheader
  %less_i_load605_ = icmp slt i64 %a, 65536
  %less_i_load605__broadcast_init = insertelement <16 x i1> undef, i1 %less_i_load605_, i32 0
  %less_i_load605__broadcast = shufflevector <16 x i1> %less_i_load605__broadcast_init, <16 x i1> undef, <16 x i32> zeroinitializer
  %"oldMask&test607" = and <16 x i1> %less_i_load605__broadcast, %b
  %intmask.i894 = bitcast <16 x i1> %"oldMask&test607" to i16
  %res.i895 = icmp eq i16 %intmask.i894, 0
  br i1 %res.i895, label %for_exit600, label %for_loop599

for_exit600:                                      ; preds = %for_loop599
  ret void
}
Make utils/update_llc_test_checks.py note that the assertions are autogenerated. Also update existing test cases which appear to be generated by it and weren't modified (other than addition of the header) by rerunning it. llvm-svn: 253917 2015-11-24 05:33:58 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s \| FileCheck %s --check-prefix=AVX`
			`; RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=knl < %s \| FileCheck %s --check-prefix=KNL-32`

[x86] Fix wrong lowering of vsetcc nodes (PR25080). Function LowerVSETCC (in X86ISelLowering.cpp) worked under the wrong assumption that for non-AVX512 targets, the source type and destination type of a type-legalized setcc node were always the same type. This assumption was unfortunately incorrect; the type legalizer is not always able to promote the return type of a setcc to the same type as the first operand of a setcc. In the case of a vsetcc node, the legalizer firstly checks if the first input operand has a legal type. If so, then it promotes the return type of the vsetcc to that same type. Otherwise, the return type is promoted to the 'next legal type', which, for vectors of MVT::i1 is always a 128-bit integer vector type. Example (-mattr=+avx): %0 = trunc <8 x i32> %a to <8 x i23> %1 = icmp eq <8 x i23> %0, zeroinitializer The initial selection dag for the code above is: v8i1 = setcc t5, t7, seteq:ch t5: v8i23 = truncate t2 t2: v8i32,ch = CopyFromReg t0, Register:v8i32 %vreg1 t7: v8i32 = build_vector of all zeroes. The type legalizer would firstly check if 't5' has a legal type. If so, then it would reuse that same type to promote the return type of the setcc node. Unfortunately 't5' is of illegal type v8i23, and therefore it cannot be used to promote the return type of the setcc node. Consequently, the setcc return type is promoted to v8i16. Later on, 't5' is promoted to v8i32 thus leading to the following dag node: v8i16 = setcc t32, t25, seteq:ch where t32 and t25 are now values of type v8i32. Before this patch, function LowerVSETCC would have wrongly expanded the setcc to a single X86ISD::PCMPEQ. Surprisingly, ISel was still able to match an instruction. In our case, ISel would have matched a VPCMPEQWrr: t37: v8i16 = X86ISD::VPCMPEQWrr t36, t25 However, t36 and t25 are both VR256, while the result type is instead of class VR128. This inconsistency ended up causing the insertion of COPY instructions like this: %vreg7<def> = COPY %vreg3; VR128:%vreg7 VR256:%vreg3 Which is an invalid full copy (not a sub register copy). Eventually, the backend would have hit an UNREACHABLE "Cannot emit physreg copy instruction" in the attempt to expand the malformed pseudo COPY instructions. This patch fixes the problem adding the missing logic in LowerVSETCC to handle the corner case of a setcc with 128-bit return type and 256-bit operand type. This problem was originally reported by Dimitry as PR25080. It has been latent for a very long time. I have added the minimal reproducible from that bugzilla as test setcc-lowering.ll. Differential Revision: http://reviews.llvm.org/D13660 llvm-svn: 250085 2015-10-13 03:22:30 +08:00
			`; Verify that we don't crash during codegen due to a wrong lowering`
			`; of a setcc node with illegal operand types and return type.`

			`define <8 x i16> @pr25080(<8 x i32> %a) {`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; AVX-LABEL: pr25080:`
			`; AVX: # BB#0: # %entry`
			`; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0`
			`; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1`
			`; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2`
			`; AVX-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1`
			`; AVX-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0`
[X86][SSE] Optimize the truncation of vector comparison results with PACKSS We currently default to using either generic shuffles or MASK+PACKUS/PACKSS to truncate all integer vectors. For vector comparisons, we know that the result will be either all or zero bits in every element, which can be efficiently truncated by directly using PACKSS to repeatedly halve the size of each element. Due to the limited input values (-1 or 0) we don't need to account for vector element size, so for simplicity we just use the PACKSS(vXi16,vXi16) implementation in all cases. Additionally for AVX2 PACKSS of 256bit data we must perform a PERMQ shuffle to reorder the data into the correct order. I did investigate performing a single shuffle after all the PACKSS calls but the need to cross 128bit lanes makes this difficult to achieve efficiently. We avoid performing this on AVX512 as it should have better alternative truncation instructions. Differential Revision: https://reviews.llvm.org/D22814 llvm-svn: 277132 2016-07-29 18:23:10 +08:00			`; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; AVX-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0`
			`; AVX-NEXT: vpsllw $15, %xmm0, %xmm0`
			`; AVX-NEXT: vpsraw $15, %xmm0, %xmm0`
			`; AVX-NEXT: vzeroupper`
			`; AVX-NEXT: retq`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`;`
			`; KNL-32-LABEL: pr25080:`
			`; KNL-32: # BB#0: # %entry`
			`; KNL-32-NEXT: vpbroadcastd {{\.LCPI.*}}, %ymm1`
			`; KNL-32-NEXT: vpand %ymm1, %ymm0, %ymm0`
			`; KNL-32-NEXT: vpxor %ymm1, %ymm1, %ymm1`
			`; KNL-32-NEXT: vpcmpeqd %zmm1, %zmm0, %k0`
			`; KNL-32-NEXT: movb $15, %al`
			`; KNL-32-NEXT: kmovw %eax, %k1`
			`; KNL-32-NEXT: korw %k1, %k0, %k1`
			`; KNL-32-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}`
			`; KNL-32-NEXT: vpmovqw %zmm0, %xmm0`
			`; KNL-32-NEXT: retl`
[x86] Fix wrong lowering of vsetcc nodes (PR25080). Function LowerVSETCC (in X86ISelLowering.cpp) worked under the wrong assumption that for non-AVX512 targets, the source type and destination type of a type-legalized setcc node were always the same type. This assumption was unfortunately incorrect; the type legalizer is not always able to promote the return type of a setcc to the same type as the first operand of a setcc. In the case of a vsetcc node, the legalizer firstly checks if the first input operand has a legal type. If so, then it promotes the return type of the vsetcc to that same type. Otherwise, the return type is promoted to the 'next legal type', which, for vectors of MVT::i1 is always a 128-bit integer vector type. Example (-mattr=+avx): %0 = trunc <8 x i32> %a to <8 x i23> %1 = icmp eq <8 x i23> %0, zeroinitializer The initial selection dag for the code above is: v8i1 = setcc t5, t7, seteq:ch t5: v8i23 = truncate t2 t2: v8i32,ch = CopyFromReg t0, Register:v8i32 %vreg1 t7: v8i32 = build_vector of all zeroes. The type legalizer would firstly check if 't5' has a legal type. If so, then it would reuse that same type to promote the return type of the setcc node. Unfortunately 't5' is of illegal type v8i23, and therefore it cannot be used to promote the return type of the setcc node. Consequently, the setcc return type is promoted to v8i16. Later on, 't5' is promoted to v8i32 thus leading to the following dag node: v8i16 = setcc t32, t25, seteq:ch where t32 and t25 are now values of type v8i32. Before this patch, function LowerVSETCC would have wrongly expanded the setcc to a single X86ISD::PCMPEQ. Surprisingly, ISel was still able to match an instruction. In our case, ISel would have matched a VPCMPEQWrr: t37: v8i16 = X86ISD::VPCMPEQWrr t36, t25 However, t36 and t25 are both VR256, while the result type is instead of class VR128. This inconsistency ended up causing the insertion of COPY instructions like this: %vreg7<def> = COPY %vreg3; VR128:%vreg7 VR256:%vreg3 Which is an invalid full copy (not a sub register copy). Eventually, the backend would have hit an UNREACHABLE "Cannot emit physreg copy instruction" in the attempt to expand the malformed pseudo COPY instructions. This patch fixes the problem adding the missing logic in LowerVSETCC to handle the corner case of a setcc with 128-bit return type and 256-bit operand type. This problem was originally reported by Dimitry as PR25080. It has been latent for a very long time. I have added the minimal reproducible from that bugzilla as test setcc-lowering.ll. Differential Revision: http://reviews.llvm.org/D13660 llvm-svn: 250085 2015-10-13 03:22:30 +08:00			`entry:`
			`%0 = trunc <8 x i32> %a to <8 x i23>`
			`%1 = icmp eq <8 x i23> %0, zeroinitializer`
			`%2 = or <8 x i1> %1, <i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>`
			`%3 = sext <8 x i1> %2 to <8 x i16>`
			`ret <8 x i16> %3`
			`}`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00
[X86] Replace undef value in flaky test D33311 exposes the flakiness in this test. Replacing the undef placed by bugpoint, makes it more interesting and robust. llvm-svn: 304168 2017-05-30 02:27:00 +08:00			`define void @pr26232(i64 %a, <16 x i1> %b) {`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; AVX-LABEL: pr26232:`
			`; AVX: # BB#0: # %for_loop599.preheader`
[X86] Replace undef value in flaky test D33311 exposes the flakiness in this test. Replacing the undef placed by bugpoint, makes it more interesting and robust. llvm-svn: 304168 2017-05-30 02:27:00 +08:00			`; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1`
[X86] Match bitcast of vxi1 to pmovmsk Summary: Add an early combine to match patterns such as: (i16 bitcast (v16i1 x)) -> (i16 movmsk (v16i8 sext (v16i1 x))) This combine needs to happen early enough before type-legalization scalarizes the result of the setcc. Reviewers: igorb, craig.topper, RKSimon Subscribers: delena, llvm-commits Differential Revision: https://reviews.llvm.org/D33311 llvm-svn: 304406 2017-06-01 19:27:57 +08:00			`; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; AVX-NEXT: .p2align 4, 0x90`
			`; AVX-NEXT: .LBB1_1: # %for_loop599`
			`; AVX-NEXT: # =>This Inner Loop Header: Depth=1`
[X86] Replace undef value in flaky test D33311 exposes the flakiness in this test. Replacing the undef placed by bugpoint, makes it more interesting and robust. llvm-svn: 304168 2017-05-30 02:27:00 +08:00			`; AVX-NEXT: xorl %eax, %eax`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; AVX-NEXT: cmpq $65536, %rdi # imm = 0x10000`
[X86] Replace undef value in flaky test D33311 exposes the flakiness in this test. Replacing the undef placed by bugpoint, makes it more interesting and robust. llvm-svn: 304168 2017-05-30 02:27:00 +08:00			`; AVX-NEXT: setl %al`
[X86] Match bitcast of vxi1 to pmovmsk Summary: Add an early combine to match patterns such as: (i16 bitcast (v16i1 x)) -> (i16 movmsk (v16i8 sext (v16i1 x))) This combine needs to happen early enough before type-legalization scalarizes the result of the setcc. Reviewers: igorb, craig.topper, RKSimon Subscribers: delena, llvm-commits Differential Revision: https://reviews.llvm.org/D33311 llvm-svn: 304406 2017-06-01 19:27:57 +08:00			`; AVX-NEXT: vmovd %eax, %xmm3`
			`; AVX-NEXT: vpshufb %xmm1, %xmm3, %xmm3`
			`; AVX-NEXT: vpand %xmm0, %xmm3, %xmm3`
			`; AVX-NEXT: vpsllw $7, %xmm3, %xmm3`
			`; AVX-NEXT: vpand %xmm2, %xmm3, %xmm3`
			`; AVX-NEXT: vpcmpgtb %xmm3, %xmm1, %xmm3`
			`; AVX-NEXT: vpmovmskb %xmm3, %eax`
			`; AVX-NEXT: testw %ax, %ax`
Regen expected tests result. NFC llvm-svn: 294866 2017-02-12 03:27:15 +08:00			`; AVX-NEXT: jne .LBB1_1`
			`; AVX-NEXT: # BB#2: # %for_exit600`
			`; AVX-NEXT: retq`
			`;`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; KNL-32-LABEL: pr26232:`
Don't delete empty preheaders in CodeGenPrepare if it would create a critical edge Presently, CodeGenPrepare deletes all nearly empty (only phi and branch) basic blocks. This pass can delete loop preheaders which frequently creates critical edges. A preheader can be a convenient place to spill registers to the stack. If the entrance to a loop body is a critical edge, then spills may occur in the loop body rather than immediately before it. This patch protects loop preheaders from deletion in CodeGenPrepare even if they are nearly empty. Since the patch alters the CFG, it affects a large number of test cases. In most cases, the changes are merely cosmetic (basic blocks have different names or instruction orders change slightly). I am somewhat concerned about the test/CodeGen/Mips/brdelayslot.ll test case. If the loop preheader is not deleted, then the MIPS backend does not take advantage of a branch delay slot. Consequently, I would like some close review by a MIPS expert. The patch also partially subsumes D16893 from George Burgess IV. George correctly notes that CodeGenPrepare does not actually preserve the dominator tree. I think the dominator tree was usually not valid when CodeGenPrepare ran, but I am using LoopInfo to mark preheaders, so the dominator tree is now always valid before CodeGenPrepare. Author: Tom Jablin (tjablin) Reviewers: hfinkel george.burgess.iv vkalintiris dsanders kbarton cycheng http://reviews.llvm.org/D16984 llvm-svn: 265397 2016-04-05 22:06:20 +08:00			`; KNL-32: # BB#0: # %for_loop599.preheader`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; KNL-32-NEXT: pushl %esi`
MCStreamer: Use "cfi" for CFI related temp labels. Choosing a "cfi" name makes the intend a bit clearer in an assembly dump and more importantly the assembly dumps are slightly more stable as the numbers don't move around anymore when unrelated code calls createTempSymbol() more or less often. As they are temp labels the name doesn't influence the generated object code. Differential Revision: https://reviews.llvm.org/D27244 llvm-svn: 288290 2016-12-01 07:48:26 +08:00			`; KNL-32-NEXT: .Lcfi0:`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; KNL-32-NEXT: .cfi_def_cfa_offset 8`
MCStreamer: Use "cfi" for CFI related temp labels. Choosing a "cfi" name makes the intend a bit clearer in an assembly dump and more importantly the assembly dumps are slightly more stable as the numbers don't move around anymore when unrelated code calls createTempSymbol() more or less often. As they are temp labels the name doesn't influence the generated object code. Differential Revision: https://reviews.llvm.org/D27244 llvm-svn: 288290 2016-12-01 07:48:26 +08:00			`; KNL-32-NEXT: .Lcfi1:`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; KNL-32-NEXT: .cfi_offset %esi, -8`
[X86] Replace undef value in flaky test D33311 exposes the flakiness in this test. Replacing the undef placed by bugpoint, makes it more interesting and robust. llvm-svn: 304168 2017-05-30 02:27:00 +08:00			`; KNL-32-NEXT: vpmovsxbd %xmm0, %zmm0`
			`; KNL-32-NEXT: vpslld $31, %zmm0, %zmm0`
			`; KNL-32-NEXT: vptestmd %zmm0, %zmm0, %k0`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %eax`
			`; KNL-32-NEXT: movl {{[0-9]+}}(%esp), %ecx`
			`; KNL-32-NEXT: movw $-1, %dx`
			`; KNL-32-NEXT: .p2align 4, 0x90`
			`; KNL-32-NEXT: .LBB1_1: # %for_loop599`
			`; KNL-32-NEXT: # =>This Inner Loop Header: Depth=1`
			`; KNL-32-NEXT: cmpl $65536, %ecx # imm = 0x10000`
			`; KNL-32-NEXT: movl %eax, %esi`
			`; KNL-32-NEXT: sbbl $0, %esi`
			`; KNL-32-NEXT: movl $0, %esi`
			`; KNL-32-NEXT: cmovlw %dx, %si`
[X86] Replace undef value in flaky test D33311 exposes the flakiness in this test. Replacing the undef placed by bugpoint, makes it more interesting and robust. llvm-svn: 304168 2017-05-30 02:27:00 +08:00			`; KNL-32-NEXT: kmovw %esi, %k1`
			`; KNL-32-NEXT: kandw %k0, %k1, %k1`
			`; KNL-32-NEXT: kmovw %k1, %esi`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; KNL-32-NEXT: testw %si, %si`
			`; KNL-32-NEXT: jne .LBB1_1`
			`; KNL-32-NEXT: # BB#2: # %for_exit600`
			`; KNL-32-NEXT: popl %esi`
[X86] Correct dwarf unwind information in function epilogue CFI instructions that set appropriate cfa offset and cfa register are now inserted in emitEpilogue() in X86FrameLowering. Majority of the changes in this patch: 1. Ensure that CFI instructions do not affect code generation. 2. Enable maintaining correct information about cfa offset and cfa register in a function when basic blocks are reordered, merged, split, duplicated. These changes are target independent and described below. Changed CFI instructions so that they: 1. are duplicable 2. are not counted as instructions when tail duplicating or tail merging 3. can be compared as equal Add information to each MachineBasicBlock about cfa offset and cfa register that are valid at its entry and exit (incoming and outgoing CFI info). Add support for updating this information when basic blocks are merged, split, duplicated, created. Add a verification pass (CFIInfoVerifier) that checks that outgoing cfa offset and register of predecessor blocks match incoming values of their successors. Incoming and outgoing CFI information is used by a late pass (CFIInstrInserter) that corrects CFA calculation rule for a basic block if needed. That means that additional CFI instructions get inserted at basic block beginning to correct the rule for calculating CFA. Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D18046 llvm-svn: 306529 2017-06-28 18:21:17 +08:00			`; KNL-32-NEXT: .Lcfi2:`
			`; KNL-32-NEXT: .cfi_def_cfa_offset 4`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`; KNL-32-NEXT: retl`
			`allocas:`
			`br label %for_test11.preheader`

			`for_test11.preheader: ; preds = %for_test11.preheader, %allocas`
			`br i1 undef, label %for_loop599, label %for_test11.preheader`

			`for_loop599: ; preds = %for_loop599, %for_test11.preheader`
			`%less_i_load605_ = icmp slt i64 %a, 65536`
			`%less_i_load605__broadcast_init = insertelement <16 x i1> undef, i1 %less_i_load605_, i32 0`
			`%less_i_load605__broadcast = shufflevector <16 x i1> %less_i_load605__broadcast_init, <16 x i1> undef, <16 x i32> zeroinitializer`
[X86] Replace undef value in flaky test D33311 exposes the flakiness in this test. Replacing the undef placed by bugpoint, makes it more interesting and robust. llvm-svn: 304168 2017-05-30 02:27:00 +08:00			`%"oldMask&test607" = and <16 x i1> %less_i_load605__broadcast, %b`
AVX512 : Fix SETCCE lowering for KNL 32 bit. Differential Revision: http://reviews.llvm.org/D16752 llvm-svn: 259342 2016-02-01 15:56:09 +08:00			`%intmask.i894 = bitcast <16 x i1> %"oldMask&test607" to i16`
			`%res.i895 = icmp eq i16 %intmask.i894, 0`
			`br i1 %res.i895, label %for_exit600, label %for_loop599`

			`for_exit600: ; preds = %for_loop599`
			`ret void`
			`}`