Fix register-dependent X86 tests.

llvm-svn: 128867
This commit is contained in:
Jakob Stoklund Olesen 2011-04-05 00:32:44 +00:00
parent 45df3dd3fe
commit bd09d45489
30 changed files with 96 additions and 76 deletions

View File

@ -1,6 +1,8 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd %rsi, %mm0}
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {movd %rdi, %mm1}
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | grep {paddusw %mm0, %mm1}
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+mmx | FileCheck %s
; CHECK: movd %rsi, [[MM0:%mm[0-9]+]]
; CHECK: movd %rdi, [[MM1:%mm[0-9]+]]
; CHECK: paddusw [[MM0]], [[MM1]]
@R = external global x86_mmx ; <x86_mmx*> [#uses=1]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split
; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split -regalloc=linearscan
define i32 @main() nounwind {
bb4.i.thread:

View File

@ -1,5 +1,5 @@
; RUN: llc -mtriple=i386-apple-darwin10.0 -relocation-model=pic -asm-verbose=false \
; RUN: -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false < %s | \
; RUN: -disable-fp-elim -mattr=-sse41,-sse3,+sse2 -post-RA-scheduler=false -regalloc=linearscan < %s | \
; RUN: FileCheck %s
; rdar://6808032

View File

@ -1,11 +1,21 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | not grep pcmpeqd
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | grep orps | grep CPI0_2 | count 2
; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep pcmpeqd | count 1
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
; This testcase shouldn't need to spill the -1 value,
; so it should just use pcmpeqd to materialize an all-ones vector.
; For i386, cp load of -1 are folded.
; With -regalloc=greedy, the live range is split before spilling, so the first
; pcmpeq doesn't get folded as a constant pool load.
; I386-NOT: pcmpeqd
; I386: orps LCPI0_2, %xmm
; I386-NOT: pcmpeqd
; I386: orps LCPI0_2, %xmm
; X86-64: pcmpeqd
; X86-64-NOT: pcmpeqd
%struct.__ImageExecInfo = type <{ <4 x i32>, <4 x float>, <2 x i64>, i8*, i8*, i8*, i32, i32, i32, i32, i32 }>
%struct._cl_image_format_t = type <{ i32, i32, i32 }>
%struct._image2d_t = type <{ i8*, %struct._cl_image_format_t, i32, i32, i32, i32, i32, i32 }>

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -regalloc=linearscan | FileCheck %s
; This testcase should need to spill the -1 value on both x86-32 and x86-64,
; so it shouldn't use pcmpeqd to materialize an all-ones vector; it

View File

@ -1,9 +1,11 @@
; RUN: llc < %s | grep {addl.\$4, %ecx}
; RUN: llc < %s | not grep leal
; RUN: llc < %s | FileCheck %s
; this should not sink %1 into bb1, that would increase reg pressure.
; rdar://6399178
; CHECK: addl $4,
; CHECK-NOT: leal
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin7"

View File

@ -4,10 +4,10 @@
; By starting the IV at -64 instead of 0, a cmp is eliminated,
; as the flags from the add can be used directly.
; STATIC: movl $-64, %ecx
; STATIC: movl $-64, [[ECX:%e..]]
; STATIC: movl %eax, _state+76(%ecx)
; STATIC: addl $16, %ecx
; STATIC: movl [[EAX:%e..]], _state+76([[ECX]])
; STATIC: addl $16, [[ECX]]
; STATIC: jne
; In PIC mode the symbol can't be folded, so the change-compare-stride

View File

@ -4,8 +4,9 @@
; Full strength reduction wouldn't reduce register pressure, so LSR should
; stick with indexing here.
; CHECK: movaps (%{{rsi|rdx}},%rax,4), %xmm3
; CHECK: movaps %xmm3, (%{{rdi|rcx}},%rax,4)
; CHECK: movaps (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
; CHECK: movaps
; CHECK: [[X3]], (%{{rdi|rcx}},%rax,4)
; CHECK: addq $4, %rax
; CHECK: cmpl %eax, (%{{rdx|r8}})
; CHECK-NEXT: jg

View File

@ -1,14 +1,12 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s -check-prefix=X64
; X64: movq ({{%rsi|%rdx}}), %rax
; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s -check-prefix=X32
; X32: movl 4(%eax),
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s -check-prefix=XMM
; XMM: movsd (%eax),
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=-sse2 | FileCheck %s
; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
; This test should use GPRs to copy the mmx value, not MMX regs. Using mmx regs,
; increases the places that need to use emms.
; CHECK-NOT: %mm
; CHECK-NOT: emms
; rdar://5741668
define void @foo(<1 x i64>* %x, <1 x i64>* %y) nounwind {

View File

@ -45,8 +45,8 @@ for.end: ; preds = %for.body, %entry
; CHECK-NEXT: align
; CHECK-NEXT: BB1_2:
; CHECK-NEXT: callq
; CHECK-NEXT: incl [[BX:%ebx|%esi]]
; CHECK-NEXT: cmpl [[R14:%r14d|%edi]], [[BX]]
; CHECK-NEXT: incl [[BX:%[a-z0-9]+]]
; CHECK-NEXT: cmpl [[R14:%[a-z0-9]+]], [[BX]]
; CHECK-NEXT: movq %rax, %r{{di|cx}}
; CHECK-NEXT: jl

View File

@ -4,10 +4,10 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-darwin10.3"
; CHECK: movl %{{.*}}, (%rdi,%rdx,4)
; CHECK: movl %{{.*}}, 8(%rdi,%rdx,4)
; CHECK: movl %{{.*}}, 4(%rdi,%rdx,4)
; CHECK: movl %{{.*}}, 12(%rdi,%rdx,4)
; CHECK: movl %{{.*}}, (%rdi,[[R0:.+]],4)
; CHECK: movl %{{.*}}, 8(%rdi,[[R0]],4)
; CHECK: movl %{{.*}}, 4(%rdi,[[R0]],4)
; CHECK: movl %{{.*}}, 12(%rdi,[[R0]],4)
define void @test(i32* nocapture %array, i32 %r0) nounwind ssp noredzone {
bb.nph:

View File

@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -relocation-model=pic -disable-fp-elim | FileCheck %s -check-prefix=X86-64
; MachineLICM should be able to hoist loop invariant reload out of the loop.
; Only linear scan needs this, -regalloc=greedy sinks the spill instead.
; rdar://7233099
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
@ -68,10 +69,12 @@ bb26.preheader: ; preds = %imix_test.exit
bb23: ; preds = %imix_test.exit
unreachable
; Verify that there are no loads inside the loop.
; X86-32: %bb26.preheader
; X86-32: movl -16(%ebp),
; X86-32-NEXT: .align 4
; X86-32-NEXT: %bb28
; X86-32: .align 4
; X86-32-NOT: (%esp),
; X86-32-NOT: (%ebp),
; X86-32: jmp
bb28: ; preds = %bb28, %bb26.preheader
%counter.035 = phi i32 [ %3, %bb28 ], [ 0, %bb26.preheader ] ; <i32> [#uses=2]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of loads added} | grep 1
; RUN: llc < %s -march=x86 -relocation-model=pic -disable-fp-elim -stats -regalloc=linearscan |& grep {Number of loads added} | grep 1
; PR3495
;
; This test may not be testing what it was supposed to test.

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -stats |& grep {Number of loads added} | grep 2
; RUN: llc < %s -march=x86 -stats |& grep {Number of register spills} | grep 1
; RUN: llc < %s -march=x86 -stats |& grep {Number of machine instrs printed} | grep 34
; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of loads added} | grep 2
; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of register spills} | grep 1
; RUN: llc < %s -march=x86 -stats -regalloc=linearscan |& grep {Number of machine instrs printed} | grep 34
; PR3495
target triple = "i386-pc-linux-gnu"

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
; XFAIL: *

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
define i32 @main(i32 %argc, i8** %argv) nounwind {
entry:

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | FileCheck %s
@.str = private constant [28 x i8] c"\0A\0ADOUBLE D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
@.str1 = private constant [37 x i8] c"double to long l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats -regalloc=linearscan |& \
; RUN: grep {pre-alloc-split} | count 2
define i32 @t(i32 %arg) {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
define i32 @t(i32 %arg) {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
define i32 @main(i32 %argc, i8** %argv) nounwind {

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
target triple = "i386-apple-darwin9.5"
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split | grep {divsd 24} | count 1
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | grep {divsd 24} | count 1
@current_surfaces.b = external global i1 ; <i1*> [#uses=1]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
@object_distance = external global double, align 8 ; <double*> [#uses=1]
@axis_slope_angle = external global double, align 8 ; <double*> [#uses=1]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
@current_surfaces.b = external global i1 ; <i1*> [#uses=1]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats |& \
; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
; RUN: grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
@current_surfaces.b = external global i1 ; <i1*> [#uses=1]

View File

@ -178,9 +178,9 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
%tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
ret <4 x float> %tmp27
; CHECK: test14:
; CHECK: addps %xmm1, %xmm0
; CHECK: subps %xmm1, %xmm2
; CHECK: movlhps %xmm2, %xmm0
; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]]
; CHECK: movlhps [[X2]], [[X0]]
}
define <4 x float> @test15(<4 x float>* %x, <4 x float>* %y) nounwind {

View File

@ -168,12 +168,12 @@ define internal void @t10() nounwind {
store <4 x i16> %6, <4 x i16>* @g2, align 8
ret void
; X64: t10:
; X64: pextrw $4, %xmm0, %eax
; X64: unpcklpd %xmm1, %xmm1
; X64: pshuflw $8, %xmm1, %xmm1
; X64: pinsrw $2, %eax, %xmm1
; X64: pextrw $6, %xmm0, %eax
; X64: pinsrw $3, %eax, %xmm1
; X64: pextrw $4, [[X0:%xmm[0-9]+]], %eax
; X64: unpcklpd [[X1:%xmm[0-9]+]]
; X64: pshuflw $8, [[X1]], [[X1]]
; X64: pinsrw $2, %eax, [[X1]]
; X64: pextrw $6, [[X0]], %eax
; X64: pinsrw $3, %eax, [[X1]]
}

View File

@ -153,16 +153,20 @@ bb30:
; an unconditional jump to complete a two-way conditional branch.
; CHECK: c_expand_expr_stmt:
; CHECK: jmp .LBB3_11
; CHECK-NEXT: .LBB3_9:
; CHECK-NEXT: movq 8(%rax), %rax
; CHECK-NEXT: xorb %dl, %dl
; CHECK-NEXT: movb 16(%rax), %al
; CHECK-NEXT: cmpb $16, %al
; CHECK-NEXT: je .LBB3_11
; CHECK-NEXT: cmpb $23, %al
; CHECK-NEXT: jne .LBB3_14
; CHECK-NEXT: .LBB3_11:
;
; This test only works when register allocation happens to use %rax for both
; load addresses.
;
; CHE: jmp .LBB3_11
; CHE-NEXT: .LBB3_9:
; CHE-NEXT: movq 8(%rax), %rax
; CHE-NEXT: xorb %dl, %dl
; CHE-NEXT: movb 16(%rax), %al
; CHE-NEXT: cmpb $16, %al
; CHE-NEXT: je .LBB3_11
; CHE-NEXT: cmpb $23, %al
; CHE-NEXT: jne .LBB3_14
; CHE-NEXT: .LBB3_11:
%0 = type { %struct.rtx_def* }
%struct.lang_decl = type opaque

View File

@ -6,15 +6,15 @@
; Check that lowered arguments on the stack do not overwrite each other.
; Add %in1 %p1 to a different temporary register (%eax).
; CHECK: movl [[A1:32|144]](%rsp), %eax
; CHECK: movl [[A1:32|144]](%rsp), [[R1:%e..]]
; Move param %in1 to temp register (%r10d).
; CHECK: movl [[A2:40|152]](%rsp), %r10d
; CHECK: movl [[A2:40|152]](%rsp), [[R2:%[a-z0-9]+]]
; Add %in1 %p1 to a different temporary register (%eax).
; CHECK: addl {{%edi|%ecx}}, %eax
; CHECK: addl {{%edi|%ecx}}, [[R1]]
; Move param %in2 to stack.
; CHECK: movl %r10d, [[A1]](%rsp)
; CHECK: movl [[R2]], [[A1]](%rsp)
; Move result of addition to stack.
; CHECK: movl %eax, [[A2]](%rsp)
; CHECK: movl [[R1]], [[A2]](%rsp)
; Eventually, do a TAILCALL
; CHECK: TAILCALL

View File

@ -40,10 +40,10 @@ entry:
; W64: subq %rax, %rsp
; W64: movq %rsp, %rax
; EFI: leaq 15(%rcx), %rax
; EFI: andq $-16, %rax
; EFI: leaq 15(%rcx), [[R1:%r..]]
; EFI: andq $-16, [[R1]]
; EFI: movq %rsp, [[R64:%r..]]
; EFI: subq %rax, [[R64]]
; EFI: subq [[R1]], [[R64]]
; EFI: movq [[R64]], %rsp
%r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind