llvm-project/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s | FileCheck %s

; This test checks optimal passing values between "cmp" and "kor" intrinsics
; PR28839

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nounwind readnone uwtable
define zeroext i16 @cmp_kor_seq_16(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, <16 x float> %x) local_unnamed_addr #0 {
; CHECK-LABEL: cmp_kor_seq_16:
; CHECK:       # %bb.0: # %entry
; CHECK-NEXT:    vcmpgeps %zmm4, %zmm0, %k0
; CHECK-NEXT:    vcmpgeps %zmm4, %zmm1, %k1
; CHECK-NEXT:    korw %k1, %k0, %k0
; CHECK-NEXT:    vcmpgeps %zmm4, %zmm2, %k1
; CHECK-NEXT:    vcmpgeps %zmm4, %zmm3, %k2
; CHECK-NEXT:    korw %k2, %k1, %k1
; CHECK-NEXT:    korw %k1, %k0, %k0
; CHECK-NEXT:    kmovw %k0, %eax
; CHECK-NEXT:    # kill: %ax<def> %ax<kill> %eax<kill>
; CHECK-NEXT:    retq
entry:
  %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i16 -1, i32 4)
  %1 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i16 -1, i32 4)
  %2 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i16 -1, i32 4)
  %3 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i16 -1, i32 4)
  %4 = tail call i16 @llvm.x86.avx512.kor.w(i16 %0, i16 %1) #2
  %5 = tail call i16 @llvm.x86.avx512.kor.w(i16 %2, i16 %3) #2
  %6 = tail call i16 @llvm.x86.avx512.kor.w(i16 %4, i16 %5) #2
  ret i16 %6
}

; Function Attrs: nounwind readnone
declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i16, i32) #1

; Function Attrs: nounwind readnone
declare i16 @llvm.x86.avx512.kor.w(i16, i16) #1

attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
AVX-512: Added a test for cmp intrinsics This is a new test that should explore a current suboptimal sequence in passing values between cmp and kor intrinsics. The code will be optimized in an upcoming patch. Submitted bug here: https://llvm.org/bugs/show_bug.cgi?id=28839 llvm-svn: 277954 2016-08-07 17:29:34 +08:00			`; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py`
			`; RUN: llc < %s \| FileCheck %s`

			`; This test checks optimal passing values between "cmp" and "kor" intrinsics`
			`; PR28839`

			`target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"`
			`target triple = "x86_64-unknown-linux-gnu"`

			`; Function Attrs: nounwind readnone uwtable`
			`define zeroext i16 @cmp_kor_seq_16(<16 x float> %a, <16 x float> %b, <16 x float> %c, <16 x float> %d, <16 x float> %x) local_unnamed_addr #0 {`
			`; CHECK-LABEL: cmp_kor_seq_16:`
[CodeGen] Unify MBB reference format in both MIR and debug output As part of the unification of the debug format and the MIR format, print MBB references as '%bb.5'. The MIR printer prints the IR name of a MBB only for block definitions. * find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)->getNumber\(\)/" << printMBBReference(\1)/g' find . \( -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#" << ([a-zA-Z0-9_]+)\.getNumber\(\)/" << printMBBReference(\1)/g' * find . \( -name ".txt" -o -name ".s" -o -name ".mir" -o -name ".cpp" -o -name ".h" -o -name ".ll" \) -type f -print0 \| xargs -0 sed -i '' -E 's/BB#([0-9]+)/%bb.\1/g' * grep -nr 'BB#' and fix Differential Revision: https://reviews.llvm.org/D40422 llvm-svn: 319665 2017-12-05 01:18:51 +08:00			`; CHECK: # %bb.0: # %entry`
AVX-512: Added a test for cmp intrinsics This is a new test that should explore a current suboptimal sequence in passing values between cmp and kor intrinsics. The code will be optimized in an upcoming patch. Submitted bug here: https://llvm.org/bugs/show_bug.cgi?id=28839 llvm-svn: 277954 2016-08-07 17:29:34 +08:00			`; CHECK-NEXT: vcmpgeps %zmm4, %zmm0, %k0`
AVX-512: Changed lowering of BITCAST between i1 vectors and i8/i16/i32 integer values Optimized lowering of BITCAST node. The BITCAST node can be replaced with COPY_TO_REG instead of KMOV. It allows to suppress two opposite BITCAST operations and avoid redundant "movs". Differential Revision: https://reviews.llvm.org/D23247 llvm-svn: 277958 2016-08-07 21:05:58 +08:00			`; CHECK-NEXT: vcmpgeps %zmm4, %zmm1, %k1`
			`; CHECK-NEXT: korw %k1, %k0, %k0`
Add LiveRangeShrink pass to shrink live range within BB. Summary: LiveRangeShrink pass moves instruction right after the definition with the same BB if the instruction and its operands all have more than one use. This pass is inexpensive and guarantees optimal live-range within BB. Reviewers: davidxl, wmi, hfinkel, MatzeB, andreadb Reviewed By: MatzeB, andreadb Subscribers: hiraditya, jyknight, sanjoy, skatkov, gberry, jholewinski, qcolombet, javed.absar, krytarowski, atrick, spatel, RKSimon, andreadb, MatzeB, mehdi_amini, mgorny, efriedma, davide, dberlin, llvm-commits Differential Revision: https://reviews.llvm.org/D32563 llvm-svn: 304371 2017-06-01 07:25:25 +08:00			`; CHECK-NEXT: vcmpgeps %zmm4, %zmm2, %k1`
			`; CHECK-NEXT: vcmpgeps %zmm4, %zmm3, %k2`
			`; CHECK-NEXT: korw %k2, %k1, %k1`
AVX-512: Added a test for cmp intrinsics This is a new test that should explore a current suboptimal sequence in passing values between cmp and kor intrinsics. The code will be optimized in an upcoming patch. Submitted bug here: https://llvm.org/bugs/show_bug.cgi?id=28839 llvm-svn: 277954 2016-08-07 17:29:34 +08:00			`; CHECK-NEXT: korw %k1, %k0, %k0`
			`; CHECK-NEXT: kmovw %k0, %eax`
[CodeGen] Print register names in lowercase in both MIR and debug output As part of the unification of the debug format and the MIR format, always print registers as lowercase. * Only debug printing is affected. It now follows MIR. Differential Revision: https://reviews.llvm.org/D40417 llvm-svn: 319187 2017-11-29 01:15:09 +08:00			`; CHECK-NEXT: # kill: %ax<def> %ax<kill> %eax<kill>`
AVX-512: Added a test for cmp intrinsics This is a new test that should explore a current suboptimal sequence in passing values between cmp and kor intrinsics. The code will be optimized in an upcoming patch. Submitted bug here: https://llvm.org/bugs/show_bug.cgi?id=28839 llvm-svn: 277954 2016-08-07 17:29:34 +08:00			`; CHECK-NEXT: retq`
			`entry:`
			`%0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %x, i32 13, i16 -1, i32 4)`
			`%1 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %b, <16 x float> %x, i32 13, i16 -1, i32 4)`
			`%2 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %c, <16 x float> %x, i32 13, i16 -1, i32 4)`
			`%3 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %d, <16 x float> %x, i32 13, i16 -1, i32 4)`
			`%4 = tail call i16 @llvm.x86.avx512.kor.w(i16 %0, i16 %1) #2`
			`%5 = tail call i16 @llvm.x86.avx512.kor.w(i16 %2, i16 %3) #2`
			`%6 = tail call i16 @llvm.x86.avx512.kor.w(i16 %4, i16 %5) #2`
			`ret i16 %6`
			`}`

			`; Function Attrs: nounwind readnone`
			`declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i16, i32) #1`

			`; Function Attrs: nounwind readnone`
			`declare i16 @llvm.x86.avx512.kor.w(i16, i16) #1`

			attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
			`attributes #1 = { nounwind readnone }`
			`attributes #2 = { nounwind }`