[X86SchedSandyBridge] update cost of COPY to 1 cycle from 0

To match the cost of other scheduling models. This is expected to
schedule mov instructions around INLINEASM less frequently for the
default machineschedule (pre-RA scheduling).

Suggested by Craig Topper.

Link: https://github.com/llvm/llvm-project/issues/41914

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D122350
This commit is contained in:
Nick Desaulniers 2022-05-05 11:06:09 -07:00
parent e1554ac63a
commit 18fd09ab64
15 changed files with 388 additions and 390 deletions

View File

@ -111,6 +111,7 @@ def : WriteRes<WriteStore, [SBPort23, SBPort4]>;
def : WriteRes<WriteStoreNT, [SBPort23, SBPort4]>;
def : WriteRes<WriteLoad, [SBPort23]> { let Latency = 5; }
def : WriteRes<WriteMove, [SBPort015]>;
def : InstRW<[WriteMove], (instrs COPY)>;
def : WriteRes<WriteZero, []>;
def : WriteRes<WriteVecMaskedGatherWriteback, []> { let Latency = 5; let NumMicroOps = 0; }

View File

@ -35,8 +35,8 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: movq X(%rip), %rdi
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: addq %r8, %rdx
; CHECK-NEXT: bswapq %rdi
; CHECK-NEXT: addq %rbx, %rdx
; CHECK-NEXT: bswapq %rdi
; CHECK-NEXT: leaq (%r15,%r14), %rsi
; CHECK-NEXT: addq %r12, %rsi
; CHECK-NEXT: addq %r11, %rdi
@ -61,8 +61,8 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rbx, %r10
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: addq %rbx, %rdi
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: leaq (%r9,%r12), %rax
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %r15, %rcx
@ -87,8 +87,8 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rbx, %rdx
; CHECK-NEXT: addq %r11, %r14
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: addq %rbx, %r14
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%r8,%r10), %rbx
; CHECK-NEXT: addq %rdx, %rbx
; CHECK-NEXT: addq %r9, %rax
@ -113,8 +113,8 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %rsi, %rbx
; CHECK-NEXT: addq %rbx, %rdi
; CHECK-NEXT: addq %r9, %r10
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: addq %rbx, %r10
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%rsi,%rdx), %rbx
; CHECK-NEXT: addq %rdi, %rbx
; CHECK-NEXT: addq %r8, %rax
@ -139,8 +139,8 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r11, %rbx
; CHECK-NEXT: addq %rbx, %r14
; CHECK-NEXT: addq %r8, %r15
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: addq %rbx, %r15
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: leaq (%r11,%rdi), %rbx
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: addq %rsi, %rax
@ -165,8 +165,8 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r9, %rbx
; CHECK-NEXT: addq %rbx, %r10
; CHECK-NEXT: addq %rsi, %r12
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: bswapq %rcx
; CHECK-NEXT: leaq (%r9,%r14), %rax
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: addq %r11, %rcx
@ -191,8 +191,8 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %r8, %rbx
; CHECK-NEXT: addq %rbx, %r15
; CHECK-NEXT: addq %rax, %rcx
; CHECK-NEXT: bswapq %rdx
; CHECK-NEXT: addq %rbx, %rcx
; CHECK-NEXT: bswapq %rdx
; CHECK-NEXT: leaq (%r8,%r10), %rbx
; CHECK-NEXT: addq %r15, %rbx
; CHECK-NEXT: addq %r9, %rdx
@ -217,9 +217,9 @@ define fastcc i64 @foo() nounwind {
; CHECK-NEXT: addq %rsi, %rdx
; CHECK-NEXT: addq %rdx, %r12
; CHECK-NEXT: addq %rdx, %rcx
; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: movq X(%rip), %rax
; CHECK-NEXT: bswapq %rax
; CHECK-NEXT: addq %r15, %rsi
; CHECK-NEXT: movq %rax, X(%rip)
; CHECK-NEXT: addq %r8, %rax
; CHECK-NEXT: addq %r12, %rsi

View File

@ -69,12 +69,12 @@ define <8 x float> @test4(<8 x double>* %p) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm1
; CHECK-NEXT: cvtpd2ps 16(%eax), %xmm2
; CHECK-NEXT: cvtpd2ps (%eax), %xmm0
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm2
; CHECK-NEXT: cvtpd2ps 48(%eax), %xmm3
; CHECK-NEXT: cvtpd2ps 32(%eax), %xmm1
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; CHECK-NEXT: retl
;
; AVX-LABEL: test4:

View File

@ -61,11 +61,11 @@ define <8 x float> @test4(<8 x double> %x) nounwind {
; CHECK-LABEL: test4:
; CHECK: # %bb.0:
; CHECK-NEXT: subl $12, %esp
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm1
; CHECK-NEXT: cvtpd2ps %xmm1, %xmm3
; CHECK-NEXT: cvtpd2ps %xmm0, %xmm0
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: cvtpd2ps %xmm2, %xmm1
; CHECK-NEXT: cvtpd2ps {{[0-9]+}}(%esp), %xmm2
; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm3[0]
; CHECK-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: addl $12, %esp
; CHECK-NEXT: retl

View File

@ -42,9 +42,9 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; LIN-SSE4-NEXT: cltq
; LIN-SSE4-NEXT: movslq %ecx, %rcx
; LIN-SSE4-NEXT: movslq %edx, %rdx
; LIN-SSE4-NEXT: movslq %esi, %rsi
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; LIN-SSE4-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; LIN-SSE4-NEXT: movslq %esi, %rax
; LIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; LIN-SSE4-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; LIN-SSE4-NEXT: retq
@ -81,9 +81,9 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; WIN-SSE4-NEXT: cltq
; WIN-SSE4-NEXT: movslq %edx, %rdx
; WIN-SSE4-NEXT: movslq %r8d, %r8
; WIN-SSE4-NEXT: movslq %r9d, %r9
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; WIN-SSE4-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; WIN-SSE4-NEXT: movslq %r9d, %rax
; WIN-SSE4-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
; WIN-SSE4-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
; WIN-SSE4-NEXT: retq
@ -94,13 +94,13 @@ define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
; LIN32-NEXT: pushl %esi
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; LIN32-NEXT: movdqa (%edx), %xmm0
; LIN32-NEXT: pand (%ecx), %xmm0
; LIN32-NEXT: pextrd $1, %xmm0, %ecx
; LIN32-NEXT: pextrd $2, %xmm0, %edx
; LIN32-NEXT: pextrd $3, %xmm0, %esi
; LIN32-NEXT: movd %xmm0, %edi
; LIN32-NEXT: movdqa (%ecx), %xmm0
; LIN32-NEXT: pand (%eax), %xmm0
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; LIN32-NEXT: movd %xmm0, %ecx
; LIN32-NEXT: pextrd $1, %xmm0, %edx
; LIN32-NEXT: pextrd $2, %xmm0, %esi
; LIN32-NEXT: pextrd $3, %xmm0, %edi
; LIN32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
; LIN32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
; LIN32-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
@ -226,22 +226,22 @@ define <4 x i64> @old(double* %p, <4 x i32>* %i, <4 x i32>* %h, i64 %f) nounwind
; LIN32-NEXT: pushl %esi
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %eax
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %edx
; LIN32-NEXT: movdqa (%edx), %xmm0
; LIN32-NEXT: pand (%ecx), %xmm0
; LIN32-NEXT: movdqa (%ecx), %xmm0
; LIN32-NEXT: pand (%eax), %xmm0
; LIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; LIN32-NEXT: movd %xmm0, %edx
; LIN32-NEXT: pextrd $1, %xmm0, %esi
; LIN32-NEXT: pextrd $2, %xmm0, %ecx
; LIN32-NEXT: pextrd $2, %xmm0, %eax
; LIN32-NEXT: pextrd $3, %xmm0, %edi
; LIN32-NEXT: andl %eax, %edx
; LIN32-NEXT: andl %eax, %esi
; LIN32-NEXT: andl %eax, %ecx
; LIN32-NEXT: andl %eax, %edi
; LIN32-NEXT: andl %ecx, %edx
; LIN32-NEXT: andl %ecx, %esi
; LIN32-NEXT: andl %ecx, %eax
; LIN32-NEXT: andl %ecx, %edi
; LIN32-NEXT: movd %esi, %xmm1
; LIN32-NEXT: movd %edx, %xmm0
; LIN32-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; LIN32-NEXT: movd %edi, %xmm2
; LIN32-NEXT: movd %ecx, %xmm1
; LIN32-NEXT: movd %eax, %xmm1
; LIN32-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
; LIN32-NEXT: popl %esi
; LIN32-NEXT: popl %edi

File diff suppressed because it is too large Load Diff

View File

@ -13,8 +13,8 @@ define i16 @reassociate_muls_i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3) {
; CHECK-NEXT: # kill
; CHECK-NEXT: # kill
; CHECK-NEXT: leal (%rdi,%rsi), %eax
; CHECK-NEXT: imull %ecx, %edx
; CHECK-NEXT: imull %edx, %eax
; CHECK-NEXT: imull %ecx, %eax
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%t0 = add i16 %x0, %x1
@ -29,8 +29,8 @@ define i32 @reassociate_muls_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; CHECK-NEXT: # kill
; CHECK-NEXT: # kill
; CHECK-NEXT: leal (%rdi,%rsi), %eax
; CHECK-NEXT: imull %ecx, %edx
; CHECK-NEXT: imull %edx, %eax
; CHECK-NEXT: imull %ecx, %eax
; CHECK-NEXT: retq
; DEAD: ADD32rr
@ -47,8 +47,8 @@ define i64 @reassociate_muls_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
; CHECK-LABEL: reassociate_muls_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: leaq (%rdi,%rsi), %rax
; CHECK-NEXT: imulq %rcx, %rdx
; CHECK-NEXT: imulq %rdx, %rax
; CHECK-NEXT: imulq %rcx, %rax
; CHECK-NEXT: retq
%t0 = add i64 %x0, %x1
%t1 = mul i64 %x2, %t0
@ -62,10 +62,10 @@ define i64 @reassociate_muls_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
; CHECK-LABEL: reassociate_ands_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: subb %sil, %dil
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: subb %sil, %al
; CHECK-NEXT: andb %dl, %al
; CHECK-NEXT: andb %cl, %al
; CHECK-NEXT: andb %dil, %al
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%t0 = sub i8 %x0, %x1
@ -79,10 +79,10 @@ define i8 @reassociate_ands_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; CHECK-LABEL: reassociate_ands_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: subl %esi, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: subl %esi, %eax
; CHECK-NEXT: andl %edx, %eax
; CHECK-NEXT: andl %ecx, %eax
; CHECK-NEXT: andl %edi, %eax
; CHECK-NEXT: retq
%t0 = sub i32 %x0, %x1
%t1 = and i32 %x2, %t0
@ -93,10 +93,10 @@ define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
; CHECK-LABEL: reassociate_ands_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: subq %rsi, %rdi
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: subq %rsi, %rax
; CHECK-NEXT: andq %rdx, %rax
; CHECK-NEXT: andq %rcx, %rax
; CHECK-NEXT: andq %rdi, %rax
; CHECK-NEXT: retq
%t0 = sub i64 %x0, %x1
%t1 = and i64 %x2, %t0
@ -110,10 +110,10 @@ define i64 @reassociate_ands_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
; CHECK-LABEL: reassociate_ors_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: subb %sil, %dil
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: subb %sil, %al
; CHECK-NEXT: orb %dl, %al
; CHECK-NEXT: orb %cl, %al
; CHECK-NEXT: orb %dil, %al
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%t0 = sub i8 %x0, %x1
@ -127,10 +127,10 @@ define i8 @reassociate_ors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; CHECK-LABEL: reassociate_ors_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: subl %esi, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: subl %esi, %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: orl %edi, %eax
; CHECK-NEXT: retq
%t0 = sub i32 %x0, %x1
%t1 = or i32 %x2, %t0
@ -141,10 +141,10 @@ define i32 @reassociate_ors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
; CHECK-LABEL: reassociate_ors_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: subq %rsi, %rdi
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: subq %rsi, %rax
; CHECK-NEXT: orq %rdx, %rax
; CHECK-NEXT: orq %rcx, %rax
; CHECK-NEXT: orq %rdi, %rax
; CHECK-NEXT: retq
%t0 = sub i64 %x0, %x1
%t1 = or i64 %x2, %t0
@ -158,10 +158,10 @@ define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
; CHECK-LABEL: reassociate_xors_i8:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: subb %sil, %dil
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: subb %sil, %al
; CHECK-NEXT: xorb %dl, %al
; CHECK-NEXT: xorb %cl, %al
; CHECK-NEXT: xorb %dil, %al
; CHECK-NEXT: # kill
; CHECK-NEXT: retq
%t0 = sub i8 %x0, %x1
@ -175,10 +175,10 @@ define i8 @reassociate_xors_i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3) {
define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; CHECK-LABEL: reassociate_xors_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: movl %edx, %eax
; CHECK-NEXT: subl %esi, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: subl %esi, %eax
; CHECK-NEXT: xorl %edx, %eax
; CHECK-NEXT: xorl %ecx, %eax
; CHECK-NEXT: xorl %edi, %eax
; CHECK-NEXT: retq
%t0 = sub i32 %x0, %x1
%t1 = xor i32 %x2, %t0
@ -189,10 +189,10 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
define i64 @reassociate_xors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
; CHECK-LABEL: reassociate_xors_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: movq %rdx, %rax
; CHECK-NEXT: subq %rsi, %rdi
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: subq %rsi, %rax
; CHECK-NEXT: xorq %rdx, %rax
; CHECK-NEXT: xorq %rcx, %rax
; CHECK-NEXT: xorq %rdi, %rax
; CHECK-NEXT: retq
%t0 = sub i64 %x0, %x1
%t1 = xor i64 %x2, %t0

View File

@ -1143,13 +1143,13 @@ define <16 x float> @v16f32_one_step(<16 x float> %x) #1 {
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm4
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1
; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1
; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1
; SANDY-NEXT: retq
;
; HASWELL-LABEL: v16f32_one_step:
@ -1333,13 +1333,13 @@ define <16 x float> @v16f32_two_step(<16 x float> %x) #2 {
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
; SANDY-NEXT: vsubps %ymm0, %ymm4, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm3
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
; SANDY-NEXT: vmulps %ymm3, %ymm1, %ymm2
; SANDY-NEXT: vsubps %ymm2, %ymm4, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm3, %ymm2
; SANDY-NEXT: vaddps %ymm2, %ymm3, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
; SANDY-NEXT: vsubps %ymm1, %ymm4, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1

View File

@ -1227,14 +1227,14 @@ define <16 x float> @v16f32_one_step2(<16 x float> %x) #1 {
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm4
; SANDY-NEXT: vmulps %ymm4, %ymm0, %ymm0
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm3
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm4, %ymm0
; SANDY-NEXT: vmovaps {{.*#+}} ymm2 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1]
; SANDY-NEXT: vmulps %ymm2, %ymm3, %ymm4
; SANDY-NEXT: vrcpps %ymm1, %ymm2
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1]
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm4
; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1
; SANDY-NEXT: vsubps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm3, %ymm1
; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1
; SANDY-NEXT: retq
;
@ -1397,13 +1397,13 @@ define <16 x float> @v16f32_one_step_2_divs(<16 x float> %x) #1 {
; SANDY-NEXT: vmulps %ymm2, %ymm0, %ymm0
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm4
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1
; SANDY-NEXT: vrcpps %ymm1, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm1
; SANDY-NEXT: vsubps %ymm1, %ymm3, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm4, %ymm1
; SANDY-NEXT: vaddps %ymm1, %ymm4, %ymm1
; SANDY-NEXT: vmulps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vaddps %ymm1, %ymm2, %ymm1
; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm2
; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm3
; SANDY-NEXT: vmulps %ymm0, %ymm3, %ymm0
@ -1627,13 +1627,13 @@ define <16 x float> @v16f32_two_step2(<16 x float> %x) #2 {
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm5
; SANDY-NEXT: vmulps %ymm5, %ymm0, %ymm0
; SANDY-NEXT: vsubps %ymm0, %ymm3, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm3
; SANDY-NEXT: vmulps %ymm0, %ymm2, %ymm0
; SANDY-NEXT: vaddps %ymm0, %ymm5, %ymm0
; SANDY-NEXT: vmulps %ymm3, %ymm1, %ymm2
; SANDY-NEXT: vsubps %ymm2, %ymm4, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm3, %ymm2
; SANDY-NEXT: vaddps %ymm2, %ymm3, %ymm2
; SANDY-NEXT: vrcpps %ymm1, %ymm2
; SANDY-NEXT: vmulps %ymm2, %ymm1, %ymm3
; SANDY-NEXT: vsubps %ymm3, %ymm4, %ymm3
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm3
; SANDY-NEXT: vaddps %ymm3, %ymm2, %ymm2
; SANDY-NEXT: vmovaps {{.*#+}} ymm3 = [9.0E+0,1.0E+1,1.1E+1,1.2E+1,1.3E+1,1.4E+1,1.5E+1,1.6E+1]
; SANDY-NEXT: vmulps %ymm3, %ymm2, %ymm4
; SANDY-NEXT: vmulps %ymm4, %ymm1, %ymm1
@ -1811,8 +1811,8 @@ define <16 x float> @v16f32_no_step2(<16 x float> %x) #3 {
; SANDY-LABEL: v16f32_no_step2:
; SANDY: # %bb.0:
; SANDY-NEXT: vrcpps %ymm0, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm1
; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; SANDY-NEXT: vrcpps %ymm1, %ymm1
; SANDY-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; SANDY-NEXT: retq
;

View File

@ -122,6 +122,8 @@ body: |
; CHECK-LABEL: name: synproxy_send_tcp_ipv6
; CHECK: liveins: $eax, $edx
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32 = COPY $edx
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32_abcd = COPY $eax
; CHECK-NEXT: [[MOV8rm:%[0-9]+]]:gr8 = MOV8rm $noreg, 1, $noreg, @csum_ipv6_magic_saddr, $noreg :: (dereferenceable load (s8) from `i8* getelementptr inbounds (%struct.in6_addr, %struct.in6_addr* @csum_ipv6_magic_saddr, i32 0, i32 0, i32 0)`)
; CHECK-NEXT: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @csum_ipv6_magic_daddr, $noreg :: (dereferenceable load (s32) from @csum_ipv6_magic_daddr, !tbaa !4)
; CHECK-NEXT: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @csum_ipv6_magic_proto, $noreg :: (dereferenceable load (s32) from @csum_ipv6_magic_proto, !tbaa !4)
@ -130,11 +132,9 @@ body: |
; CHECK-NEXT: MOV32mr $noreg, 1, $noreg, @csum_ipv6_magic_sum, $noreg, %2 :: (store (s32) into @csum_ipv6_magic_sum, !tbaa !4)
; CHECK-NEXT: [[MOV32rm2:%[0-9]+]]:gr32 = MOV32rm $noreg, 1, $noreg, @synproxy_send_tcp_ipv6_nskb, $noreg :: (dereferenceable load (s32) from `i8** bitcast (%struct.sk_buff** @synproxy_send_tcp_ipv6_nskb to i8**)`, !tbaa !9)
; CHECK-NEXT: OR8mi [[MOV32rm2]], 1, $noreg, 0, $noreg, 3, implicit-def dead $eflags :: (store (s8) into %ir.4), (load (s8) from %ir.4)
; CHECK-NEXT: [[COPY:%[0-9]+]]:gr32_abcd = COPY $eax
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr32 = COPY $edx
; CHECK-NEXT: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm [[COPY1]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.5, !tbaa !11)
; CHECK-NEXT: [[MOV8rm1:%[0-9]+]]:gr8 = MOV8rm [[COPY]], 1, $noreg, 0, $noreg :: (load (s8) from %ir.5, !tbaa !11)
; CHECK-NEXT: MOV8mr $noreg, 1, $noreg, @synproxy_send_tcp_ipv6_fl6, $noreg, [[MOV8rm1]] :: (store (s8) into `i8* getelementptr inbounds (%struct.in6_addr, %struct.in6_addr* @synproxy_send_tcp_ipv6_fl6, i32 0, i32 0, i32 0)`, !tbaa !11)
; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[COPY]].sub_8bit
; CHECK-NEXT: [[MOVZX32rr8_:%[0-9]+]]:gr32 = MOVZX32rr8 [[COPY1]].sub_8bit
; CHECK-NEXT: $eax = COPY [[MOVZX32rr8_]]
; CHECK-NEXT: TCRETURNdi @fl6nthsecurity_skb_classify_flow, 0, csr_32, implicit $esp, implicit $ssp, implicit $eax
%1:gr32 = COPY $edx

View File

@ -16,8 +16,8 @@ define float @f32_no_daz(float %f) #0 {
; NHM-NEXT: mulss %xmm2, %xmm3
; NHM-NEXT: mulss %xmm1, %xmm2
; NHM-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; NHM-NEXT: mulss %xmm3, %xmm2
; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; NHM-NEXT: cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; NHM-NEXT: andnps %xmm2, %xmm0
; NHM-NEXT: retq
@ -40,8 +40,8 @@ define <4 x float> @v4f32_no_daz(<4 x float> %f) #0 {
; NHM-NEXT: mulps %xmm2, %xmm3
; NHM-NEXT: mulps %xmm1, %xmm2
; NHM-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; NHM-NEXT: mulps %xmm3, %xmm2
; NHM-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; NHM-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; NHM-NEXT: cmpleps %xmm0, %xmm1
; NHM-NEXT: andps %xmm2, %xmm1
@ -55,8 +55,8 @@ define <4 x float> @v4f32_no_daz(<4 x float> %f) #0 {
; SNB-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; SNB-NEXT: vmulps %xmm1, %xmm2, %xmm1
; SNB-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; SNB-NEXT: vmulps %xmm1, %xmm3, %xmm1
; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; SNB-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; SNB-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
; SNB-NEXT: vandps %xmm1, %xmm0, %xmm0
@ -126,8 +126,8 @@ define <8 x float> @v8f32_no_daz(<8 x float> %f) #0 {
; SNB-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm3
; SNB-NEXT: vmulps %ymm1, %ymm2, %ymm1
; SNB-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; SNB-NEXT: vmulps %ymm1, %ymm3, %ymm1
; SNB-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; SNB-NEXT: vmovaps {{.*#+}} ymm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; SNB-NEXT: vcmpleps %ymm0, %ymm2, %ymm0
; SNB-NEXT: vandps %ymm1, %ymm0, %ymm0

View File

@ -85,8 +85,8 @@ define float @finite_f32_estimate_ieee_ninf(float %f) #1 {
; SSE-NEXT: mulss %xmm2, %xmm3
; SSE-NEXT: mulss %xmm1, %xmm2
; SSE-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: mulss %xmm3, %xmm2
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: retq
@ -98,8 +98,8 @@ define float @finite_f32_estimate_ieee_ninf(float %f) #1 {
; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
@ -107,16 +107,15 @@ define float @finite_f32_estimate_ieee_ninf(float %f) #1 {
; AVX512-LABEL: finite_f32_estimate_ieee_ninf:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm2
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
%call = tail call ninf afn float @__sqrtf_finite(float %f) #2
ret float %call
@ -229,8 +228,8 @@ define float @sqrtf_check_denorms_ninf(float %x) #3 {
; SSE-NEXT: mulss %xmm2, %xmm3
; SSE-NEXT: mulss %xmm1, %xmm2
; SSE-NEXT: addss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: mulss %xmm3, %xmm2
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: cmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: andnps %xmm2, %xmm0
; SSE-NEXT: retq
@ -242,8 +241,8 @@ define float @sqrtf_check_denorms_ninf(float %x) #3 {
; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vaddss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
@ -251,16 +250,15 @@ define float @sqrtf_check_denorms_ninf(float %x) #3 {
; AVX512-LABEL: sqrtf_check_denorms_ninf:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem
; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm2
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem
; AVX512-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
%call = tail call ninf afn float @__sqrtf_finite(float %x) #2
ret float %call
@ -290,8 +288,8 @@ define <4 x float> @sqrt_v4f32_check_denorms_ninf(<4 x float> %x) #3 {
; SSE-NEXT: mulps %xmm2, %xmm3
; SSE-NEXT: mulps %xmm1, %xmm2
; SSE-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: mulps %xmm3, %xmm2
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; SSE-NEXT: cmpleps %xmm0, %xmm1
; SSE-NEXT: andps %xmm2, %xmm1
@ -305,8 +303,8 @@ define <4 x float> @sqrt_v4f32_check_denorms_ninf(<4 x float> %x) #3 {
; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3
; AVX1-NEXT: vmulps %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmulps %xmm1, %xmm3, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
@ -397,22 +395,21 @@ define float @f32_estimate2(float %x) #5 {
; AVX1-LABEL: f32_estimate2:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm1
; AVX1-NEXT: vandnps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: f32_estimate2:
; AVX512: # %bb.0:
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1
; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %k1
; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1}
; AVX512-NEXT: vmovaps %xmm1, %xmm0
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm2
; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vcmpltss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1
; AVX512-NEXT: vmulss %xmm2, %xmm0, %xmm0
; AVX512-NEXT: vxorps %xmm1, %xmm1, %xmm1
; AVX512-NEXT: vmovss %xmm1, %xmm0, %xmm0 {%k1}
; AVX512-NEXT: retq
%sqrt = tail call fast float @llvm.sqrt.f32(float %x)
ret float %sqrt
@ -495,11 +492,11 @@ define <4 x float> @v4f32_estimate2(<4 x float> %x) #5 {
; AVX1-LABEL: v4f32_estimate2:
; AVX1: # %bb.0:
; AVX1-NEXT: vrsqrtps %xmm0, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; AVX1-NEXT: vcmpleps %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vmovaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: v4f32_estimate2:
@ -667,11 +664,11 @@ define <16 x float> @v16f32_estimate(<16 x float> %x) #1 {
; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: vrsqrtps %ymm1, %ymm5
; AVX1-NEXT: vmulps %ymm0, %ymm4, %ymm0
; AVX1-NEXT: vrsqrtps %ymm1, %ymm4
; AVX1-NEXT: vmulps %ymm3, %ymm4, %ymm3
; AVX1-NEXT: vmulps %ymm4, %ymm1, %ymm1
; AVX1-NEXT: vmulps %ymm4, %ymm1, %ymm1
; AVX1-NEXT: vmulps %ymm3, %ymm5, %ymm3
; AVX1-NEXT: vmulps %ymm5, %ymm1, %ymm1
; AVX1-NEXT: vmulps %ymm5, %ymm1, %ymm1
; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: vmulps %ymm1, %ymm3, %ymm1
; AVX1-NEXT: retq
@ -799,8 +796,8 @@ define <4 x float> @div_sqrt_fabs_v4f32_fmf(<4 x float> %x, <4 x float> %y, <4 x
; SSE-NEXT: mulps %xmm3, %xmm2
; SSE-NEXT: addps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2
; SSE-NEXT: mulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3
; SSE-NEXT: mulps %xmm2, %xmm3
; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: mulps %xmm2, %xmm3
; SSE-NEXT: divps %xmm1, %xmm3
; SSE-NEXT: mulps %xmm3, %xmm0
; SSE-NEXT: retq
@ -812,8 +809,8 @@ define <4 x float> @div_sqrt_fabs_v4f32_fmf(<4 x float> %x, <4 x float> %y, <4 x
; AVX1-NEXT: vmulps %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
; AVX1-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3, %xmm3
; AVX1-NEXT: vmulps %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vmulps %xmm2, %xmm3, %xmm2
; AVX1-NEXT: vdivps %xmm1, %xmm2, %xmm1
; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
@ -827,8 +824,8 @@ define <4 x float> @div_sqrt_fabs_v4f32_fmf(<4 x float> %x, <4 x float> %y, <4 x
; AVX512-NEXT: vmulps %xmm3, %xmm2, %xmm2
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0]
; AVX512-NEXT: vaddps %xmm3, %xmm2, %xmm2
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vmulps %xmm2, %xmm4, %xmm2
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm3 = [NaN,NaN,NaN,NaN]
; AVX512-NEXT: vandps %xmm3, %xmm1, %xmm1
; AVX512-NEXT: vdivps %xmm1, %xmm2, %xmm1
; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm0
@ -845,16 +842,16 @@ define <4 x float> @div_sqrt_fabs_v4f32_fmf(<4 x float> %x, <4 x float> %y, <4 x
define double @div_sqrt_fabs_f64(double %x, double %y, double %z) {
; SSE-LABEL: div_sqrt_fabs_f64:
; SSE: # %bb.0:
; SSE-NEXT: sqrtsd %xmm2, %xmm2
; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1
; SSE-NEXT: sqrtsd %xmm2, %xmm2
; SSE-NEXT: mulsd %xmm2, %xmm1
; SSE-NEXT: divsd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: div_sqrt_fabs_f64:
; AVX: # %bb.0:
; AVX-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2
; AVX-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vsqrtsd %xmm2, %xmm2, %xmm2
; AVX-NEXT: vmulsd %xmm1, %xmm2, %xmm1
; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq

View File

@ -914,8 +914,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; SSE-NEXT: addpd %xmm6, %xmm2
; SSE-NEXT: addpd %xmm7, %xmm3
; SSE-NEXT: addpd %xmm5, %xmm1
; SSE-NEXT: addpd %xmm3, %xmm1
; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm4
; SSE-NEXT: addpd %xmm3, %xmm1
; SSE-NEXT: addpd %xmm2, %xmm4
; SSE-NEXT: addpd %xmm1, %xmm4
; SSE-NEXT: movapd %xmm4, %xmm1

View File

@ -648,8 +648,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) {
; SSE-NEXT: mulpd %xmm6, %xmm2
; SSE-NEXT: mulpd %xmm7, %xmm3
; SSE-NEXT: mulpd %xmm5, %xmm1
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm4
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd %xmm2, %xmm4
; SSE-NEXT: mulpd %xmm1, %xmm4
; SSE-NEXT: movapd %xmm4, %xmm1

View File

@ -3253,9 +3253,9 @@ define void @PR45604(<32 x i16>* %dst, <8 x i16>* %src) {
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[0,1],zero,zero,zero,zero,zero,zero,xmm1[2,3],zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,0,0,0,11,0,0,0,0,0,0,0,11,0,0,0]
; SSSE3-NEXT: por %xmm2, %xmm1
; SSSE3-NEXT: movdqa %xmm0, %xmm3
; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[4,5],zero,zero,zero,zero,zero,zero,xmm3[6,7],zero,zero,zero,zero,zero,zero
; SSSE3-NEXT: por %xmm2, %xmm1
; SSSE3-NEXT: por %xmm2, %xmm3
; SSSE3-NEXT: movdqa %xmm0, %xmm4
; SSSE3-NEXT: pshufb {{.*#+}} xmm4 = xmm4[8,9],zero,zero,zero,zero,zero,zero,xmm4[10,11],zero,zero,zero,zero,zero,zero