[X86] Auto-generate complete checks. NFC

llvm-svn: 324295
This commit is contained in:
Craig Topper 2018-02-05 23:57:03 +00:00
parent 9c6c7c5e9b
commit 9198efceb8
1 changed files with 54 additions and 21 deletions

View File

@ -1,24 +1,30 @@
; RUN: llc < %s -o - -mcpu=generic -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s
; RUN: llc < %s -o - -mcpu=generic -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -o - -mcpu=generic -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,SSE2
; RUN: llc < %s -o - -mcpu=generic -mtriple=x86_64-apple-darwin -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE41
; For a setult against a constant, turn it into a setule and lower via psubusw.
define void @loop_no_const_reload(<2 x i64>* %in, <2 x i64>* %out, i32 %n) {
; CHECK: .short 25
; CHECK-NEXT: .short 25
; CHECK-NEXT: .short 25
; CHECK-NEXT: .short 25
; CHECK-NEXT: .short 25
; CHECK-NEXT: .short 25
; CHECK-NEXT: .short 25
; CHECK-NEXT: .short 25
; CHECK-LABEL: loop_no_const_reload:
; CHECK: psubusw
; Constant is no longer clobbered so no need to reload it in the loop.
; CHECK-NOT: movdqa {{%xmm[0-9]+}}, {{%xmm[0-9]+}}
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: je LBB0_3
; CHECK-NEXT: ## %bb.1: ## %for.body.preheader
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [25,25,25,25,25,25,25,25]
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB0_2: ## %for.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movdqa (%rdi,%rax), %xmm2
; CHECK-NEXT: psubusw %xmm0, %xmm2
; CHECK-NEXT: pcmpeqw %xmm1, %xmm2
; CHECK-NEXT: movdqa %xmm2, (%rsi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: decl %edx
; CHECK-NEXT: jne LBB0_2
; CHECK-NEXT: LBB0_3: ## %for.end
; CHECK-NEXT: retq
entry:
%cmp9 = icmp eq i32 %n, 0
br i1 %cmp9, label %for.end, label %for.body
@ -45,9 +51,27 @@ for.end: ; preds = %for.body, %entry
; Be careful if decrementing the constant would undeflow.
define void @loop_const_folding_underflow(<2 x i64>* %in, <2 x i64>* %out, i32 %n) {
; CHECK-NOT: .short 25
; CHECK-LABEL: loop_const_folding_underflow:
; CHECK-NOT: psubusw
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: je LBB1_3
; CHECK-NEXT: ## %bb.1: ## %for.body.preheader
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
; CHECK-NEXT: movdqa {{.*#+}} xmm1 = [32768,32794,32794,32794,32794,32794,32794,32794]
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB1_2: ## %for.body
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
; CHECK-NEXT: movdqa (%rdi,%rax), %xmm2
; CHECK-NEXT: pxor %xmm0, %xmm2
; CHECK-NEXT: movdqa %xmm1, %xmm3
; CHECK-NEXT: pcmpgtw %xmm2, %xmm3
; CHECK-NEXT: movdqa %xmm3, (%rsi,%rax)
; CHECK-NEXT: addq $16, %rax
; CHECK-NEXT: decl %edx
; CHECK-NEXT: jne LBB1_2
; CHECK-NEXT: LBB1_3: ## %for.end
; CHECK-NEXT: retq
entry:
%cmp9 = icmp eq i32 %n, 0
br i1 %cmp9, label %for.end, label %for.body
@ -74,9 +98,12 @@ for.end: ; preds = %for.body, %entry
; Test for PSUBUSB
define <16 x i8> @test_ult_byte(<16 x i8> %a) {
; CHECK: .space 16,10
; CHECK-LABEL: test_ult_byte:
; CHECK: psubus
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: psubusb {{.*}}(%rip), %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm1
; CHECK-NEXT: pcmpeqb %xmm1, %xmm0
; CHECK-NEXT: retq
entry:
%icmp = icmp ult <16 x i8> %a, <i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11, i8 11>
%sext = sext <16 x i1> %icmp to <16 x i8>
@ -88,7 +115,13 @@ entry:
define <8 x i16> @test_ult_register(<8 x i16> %a, <8 x i16> %b) {
; CHECK-LABEL: test_ult_register:
; CHECK-NOT: psubus
; CHECK: ## %bb.0: ## %entry
; CHECK-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; CHECK-NEXT: pxor %xmm2, %xmm0
; CHECK-NEXT: pxor %xmm1, %xmm2
; CHECK-NEXT: pcmpgtw %xmm0, %xmm2
; CHECK-NEXT: movdqa %xmm2, %xmm0
; CHECK-NEXT: retq
entry:
%icmp = icmp ult <8 x i16> %a, %b
%sext = sext <8 x i1> %icmp to <8 x i16>