Enable MI Sched for x86.

This changes the SelectionDAG scheduling preference to source
order. Soon, the SelectionDAG scheduler can be bypassed saving
a nice chunk of compile time.

Performance differences that result from this change are often a
consequence of register coalescing. The register coalescer is far from
perfect. Bugs can be filed for deficiencies.

On x86 SandyBridge/Haswell, the source order schedule is often
preserved, particularly for small blocks.

Register pressure is generally improved over the SD scheduler's ILP
mode. However, we are still able to handle large blocks that require
latency hiding, unlike the SD scheduler's BURR mode. MI scheduler also
attempts to discover the critical path in single-block loops and
adjust heuristics accordingly.

The MI scheduler relies on the new machine model. This is currently
unimplemented for AVX, so we may not be generating the best code yet.

Unit tests are updated so they don't depend on SD scheduling heuristics.

llvm-svn: 192750
This commit is contained in:
Andrew Trick 2013-10-15 23:33:07 +00:00
parent a6c38a32a9
commit e97d8d6dde
67 changed files with 346 additions and 285 deletions

View File

@ -383,11 +383,14 @@ public:
/// memset with zero passed as the second argument. Otherwise it
/// returns null.
const char *getBZeroEntry() const;
/// This function returns true if the target has sincos() routine in its
/// compiler runtime or math libraries.
bool hasSinCos() const;
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const LLVM_OVERRIDE { return true; }
/// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,

View File

@ -1,7 +1,10 @@
; REQUIRES: asserts
; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \
; RUN: grep asm-printer | grep 14
; RUN: grep asm-printer | grep 16
;
; It's possible to schedule this in 14 instructions by avoiding
; callee-save registers, but the scheduler isn't currently that
; conervative with registers.
@size20 = external global i32 ; <i32*> [#uses=1]
@in5 = external global i8* ; <i8**> [#uses=1]
@ -21,4 +24,3 @@ define i32 @compare(i8* %a, i8* %b) nounwind {
}
declare i32 @memcmp(i8*, i8*, i32)

View File

@ -13,10 +13,10 @@ define float @foo(float %x) nounwind {
; CHECK: mulss
; CHECK: mulss
; CHECK: addss
; CHECK: mulss
; CHECK: mulss
; CHECK: addss
; CHECK: mulss
; CHECK: addss
; CHECK: addss
; CHECK: ret
}

View File

@ -17,9 +17,9 @@ bb4: ; preds = %bb.i, %bb26, %bb4, %entry
; CHECK: %bb4
; CHECK: xorl
; CHECK: callq
; CHECK: xorl
; CHECK: xorl
; CHECK: movq
; CHECK: xorl
; CHECK: xorl
%0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; <i32> [#uses=0]
%ins = or i64 %p, 2097152 ; <i64> [#uses=1]

View File

@ -1,9 +1,9 @@
; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s
; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt -enable-misched=false < %s | FileCheck %s
; Check that lowered argumens do not overwrite the return address before it is moved.
; Bug 6225
;
; If a call is a fastcc tail call and tail call optimization is enabled, the
; caller frame is replaced by the callee frame. This can require that arguments are
; caller frame is replaced by the callee frame. This can require that arguments are
; placed on the former return address stack slot. Special care needs to be taken
; taken that the return address is moved / or stored in a register before
; lowering of arguments potentially overwrites the value.
@ -51,5 +51,3 @@ false:
tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind
ret void
}

View File

@ -19,8 +19,8 @@ entry:
}
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip)
; CHECK: movb 38(%rsp), [[R0:%.+]]
; CHECK: movb 8(%rsp), [[R1:%.+]]
; CHECK: movb [[R1]], 8(%rsp)
; CHECK: movb [[R0]], 38(%rsp)
; CHECK: movb (%rsp), [[R1:%.+]]
; CHECK: movb 30(%rsp), [[R0:%.+]]
; CHECK: movb [[R1]], (%rsp)
; CHECK: movb [[R0]], 30(%rsp)
; CHECK: callq ___stack_chk_fail

View File

@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu"
; CHECK: main
define i32 @main() nounwind uwtable {
entry:
; CHECK: pmovsxbq j(%rip), %
; CHECK: pmovsxbq i(%rip), %
; CHECK: pmovsxbq j(%rip), %
%0 = load <2 x i8>* @i, align 8
%1 = load <2 x i8>* @j, align 8
%div = sdiv <2 x i8> %1, %0
@ -25,4 +25,3 @@ entry:
ret i32 0
; CHECK: ret
}

View File

@ -5,8 +5,8 @@
; It's hard to test for the ISEL condition because CodeGen optimizes
; away the bugpointed code. Just ensure the basics are still there.
;CHECK-LABEL: func:
;CHECK: vxorps
;CHECK: vinsertf128
;CHECK: vpxor
;CHECK: vinserti128
;CHECK: vpshufd
;CHECK: vpshufd
;CHECK: vmulps

View File

@ -34,7 +34,8 @@ entry:
; 64BIT-LABEL: t2:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal -1(%rsi), %eax
; 64BIT: decl %eax
; 64BIT: movzwl %ax
%0 = icmp eq i16 %k, %c ; <i1> [#uses=1]
%1 = add i16 %k, -1 ; <i16> [#uses=3]
br i1 %0, label %bb, label %bb1
@ -58,7 +59,7 @@ entry:
; 64BIT-LABEL: t3:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal 2(%rsi), %eax
; 64BIT: addl $2, %eax
%0 = add i16 %k, 2 ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1
@ -81,7 +82,7 @@ entry:
; 64BIT-LABEL: t4:
; 64BIT-NOT: movw %si, %ax
; 64BIT: leal (%rsi,%rdi), %eax
; 64BIT: addl %edi, %eax
%0 = add i16 %k, %c ; <i16> [#uses=3]
%1 = icmp eq i16 %k, %c ; <i1> [#uses=1]
br i1 %1, label %bb, label %bb1

View File

@ -4,8 +4,8 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
;YESCOLOR: subq $136, %rsp
;NOCOLOR: subq $264, %rsp
;YESCOLOR: subq $144, %rsp
;NOCOLOR: subq $272, %rsp
define i32 @myCall_w2(i32 %in) {
entry:

View File

@ -1,16 +1,16 @@
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC
; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC
@src = external global [131072 x i32]
@dst = external global [131072 x i32]

View File

@ -9,7 +9,7 @@ define i32 @test1(i32 inreg %a) nounwind {
%b = add i32 %a, 128
ret i32 %b
; X32: subl $-128, %eax
; X64: subl $-128,
; X64: subl $-128,
}
define i64 @test2(i64 inreg %a) nounwind {
%b = add i64 %a, 2147483648
@ -20,7 +20,7 @@ define i64 @test2(i64 inreg %a) nounwind {
define i64 @test3(i64 inreg %a) nounwind {
%b = add i64 %a, 128
ret i64 %b
; X32: addl $128, %eax
; X64: subq $-128,
}
@ -38,7 +38,7 @@ normal:
overflow:
ret i1 false
; X32-LABEL: test4:
; X32: addl
; X32-NEXT: jo
@ -82,11 +82,11 @@ define i64 @test6(i64 %A, i32 %B) nounwind {
ret i64 %tmp5
; X32-LABEL: test6:
; X32: movl 12(%esp), %edx
; X32: movl 4(%esp), %eax
; X32-NEXT: movl 12(%esp), %edx
; X32-NEXT: addl 8(%esp), %edx
; X32-NEXT: movl 4(%esp), %eax
; X32-NEXT: ret
; X64-LABEL: test6:
; X64: shlq $32, %r[[A1]]
; X64: leaq (%r[[A1]],%r[[A0]]), %rax

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s
declare void @bar(<2 x i64>* %n)

View File

@ -240,14 +240,14 @@ define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
; CHECK-NEXT: vpmuludq %xmm
; CHECK-NEXT: vpsllq $32, %xmm
; CHECK-NEXT: vpaddq %xmm
; CHECK-NEXT: vpmuludq %xmm
; CHECK-NEXT: vpsrlq $32, %xmm
; CHECK-NEXT: vpmuludq %xmm
; CHECK-NEXT: vpsllq $32, %xmm
; CHECK-NEXT: vpsrlq $32, %xmm
; CHECK-NEXT: vpmuludq %xmm
; CHECK-NEXT: vpsllq $32, %xmm
; CHECK-NEXT: vpaddq %xmm
; CHECK-NEXT: vpmuludq %xmm
; CHECK-NEXT: vpsrlq $32, %xmm
; CHECK-NEXT: vpmuludq %xmm
; CHECK-NEXT: vpsllq $32, %xmm
; CHECK-NEXT: vpaddq %xmm
; CHECK-NEXT: vpsrlq $32, %xmm
; CHECK-NEXT: vpmuludq %xmm
@ -269,4 +269,3 @@ define <4 x float> @int_sqrt_ss() {
%x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind
ret <4 x float> %x2
}

View File

@ -32,7 +32,7 @@ declare i32 @func_int(i32, i32)
define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
%2 = load <16 x float>* %y, align 16
%3 = fadd <16 x float> %2, %1
ret <16 x float> %3
@ -43,21 +43,21 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind {
; preserved ymm6-ymm15
; WIN64: testf16_regs
; WIN64: call
; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0
; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1
; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; WIN64: ret
; preserved ymm8-ymm15
; X64: testf16_regs
; X64: call
; X64: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0
; X64: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1
; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}}
; X64: ret
define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind {
%y = alloca <16 x float>, align 16
%x = fadd <16 x float> %a, %b
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
%1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y)
%2 = load <16 x float>* %y, align 16
%3 = fadd <16 x float> %1, %b
%4 = fadd <16 x float> %2, %3
@ -166,4 +166,3 @@ entry:
%8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
ret <8 x float> %8
}

View File

@ -81,7 +81,7 @@ entry:
define i32 @test9(<4 x i32> %a) nounwind {
; CHECK: test9
; CHECK: vpextrd
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4>
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> <i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 undef, i32 4>
%r = extractelement <8 x i32> %b, i32 2
; CHECK: ret
ret i32 %r
@ -251,6 +251,8 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind {
; CHECK: swap8doubles
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}}
; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}}
; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}}
; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi)

View File

@ -167,8 +167,8 @@ define i32 @float_to_int(float %x) {
}
; CHECK-LABEL: uitof64
; CHECK: vextracti64x4
; CHECK: vcvtudq2pd
; CHECK: vextracti64x4
; CHECK: vcvtudq2pd
; CHECK: ret
define <16 x double> @uitof64(<16 x i32> %a) nounwind {

View File

@ -27,8 +27,8 @@ define i16 @mand16(i16 %x, i16 %y) {
%md = xor <16 x i1> %ma, %mb
%me = or <16 x i1> %mc, %md
%ret = bitcast <16 x i1> %me to i16
; CHECK: kxorw
; CHECK: kandw
; CHECK: kxorw
; CHECK: korw
ret i16 %ret
}
@ -55,4 +55,3 @@ define i8 @shuf_test1(i16 %v) nounwind {
%mask1 = bitcast <8 x i1> %mask to i8
ret i8 %mask1
}

View File

@ -1,7 +1,7 @@
; Without list-burr scheduling we may not see the difference in codegen here.
; Use a subtarget that has post-RA scheduling enabled because the anti-dependency
; breaker requires liveness information to be kept.
; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t
; RUN: grep "%xmm0" %t | count 14
; RUN: not grep "%xmm1" %t
; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t

View File

@ -38,7 +38,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @test2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: test2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = lshr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
@ -56,7 +56,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @atest2(i32 %x, i32 %n) nounwind {
entry:
; CHECK: atest2
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1]
@ -74,7 +74,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @atest2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: atest2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
%tmp4 = icmp eq i32 %tmp3, 0 ; <i1> [#uses=1]
@ -91,7 +91,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @test3(i32 %x, i32 %n) nounwind {
entry:
; CHECK: test3
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1]
@ -109,7 +109,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @test3b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: test3b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %x, %tmp29
@ -127,7 +127,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @testne2(i32 %x, i32 %n) nounwind {
entry:
; CHECK: testne2
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = lshr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1]
@ -145,7 +145,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @testne2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: testne2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = lshr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
@ -163,7 +163,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @atestne2(i32 %x, i32 %n) nounwind {
entry:
; CHECK: atestne2
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1]
@ -181,7 +181,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @atestne2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: atestne2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
@ -199,7 +199,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @testne3(i32 %x, i32 %n) nounwind {
entry:
; CHECK: testne3
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1]
@ -217,7 +217,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @testne3b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: testne3b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %x, %tmp29
@ -235,7 +235,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @query2(i32 %x, i32 %n) nounwind {
entry:
; CHECK: query2
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = lshr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1]
@ -253,7 +253,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @query2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: query2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = lshr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
@ -271,7 +271,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @aquery2(i32 %x, i32 %n) nounwind {
entry:
; CHECK: aquery2
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1]
@ -289,7 +289,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @aquery2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: aquery2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
@ -307,7 +307,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @query3(i32 %x, i32 %n) nounwind {
entry:
; CHECK: query3
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1]
@ -325,7 +325,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @query3b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: query3b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %x, %tmp29
@ -343,7 +343,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @query3x(i32 %x, i32 %n) nounwind {
entry:
; CHECK: query3x
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1]
@ -361,7 +361,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @query3bx(i32 %x, i32 %n) nounwind {
entry:
; CHECK: query3bx
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jae
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %x, %tmp29
@ -379,7 +379,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @queryne2(i32 %x, i32 %n) nounwind {
entry:
; CHECK: queryne2
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = lshr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1]
@ -397,7 +397,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @queryne2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: queryne2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = lshr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
@ -415,7 +415,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @aqueryne2(i32 %x, i32 %n) nounwind {
entry:
; CHECK: aqueryne2
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, 1 ; <i32> [#uses=1]
@ -433,7 +433,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @aqueryne2b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: aqueryne2b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = ashr i32 %x, %n ; <i32> [#uses=1]
%tmp3 = and i32 1, %tmp29
@ -451,7 +451,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @queryne3(i32 %x, i32 %n) nounwind {
entry:
; CHECK: queryne3
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1]
@ -469,7 +469,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @queryne3b(i32 %x, i32 %n) nounwind {
entry:
; CHECK: queryne3b
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %x, %tmp29
@ -487,7 +487,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @queryne3x(i32 %x, i32 %n) nounwind {
entry:
; CHECK: queryne3x
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %tmp29, %x ; <i32> [#uses=1]
@ -505,7 +505,7 @@ UnifiedReturnBlock: ; preds = %entry
define void @queryne3bx(i32 %x, i32 %n) nounwind {
entry:
; CHECK: queryne3bx
; CHECK: btl %eax, %ecx
; CHECK: btl %e{{..}}, %e{{..}}
; CHECK: jb
%tmp29 = shl i32 1, %n ; <i32> [#uses=1]
%tmp3 = and i32 %x, %tmp29

View File

@ -7,14 +7,14 @@
define i32 @main() nounwind {
entry:
; CHECK-LABEL: main:
; CHECK: movl $1, (%esp)
; CHECK: leal 16(%esp), %edi
; CHECK: leal 160(%esp), %esi
; CHECK: rep;movsl
; CHECK: movl $1, (%esp)
%s = alloca %struct.S ; <%struct.S*> [#uses=2]
%tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1]
store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16
call void @t( i32 1, %struct.S* byval %s ) nounwind
call void @t( i32 1, %struct.S* byval %s ) nounwind
ret i32 0
}

View File

@ -3,8 +3,8 @@
;CHECK-LABEL: cftx020:
;CHECK: vmovsd (%rdi), %xmm{{.*}}
;CHECK: vmovsd 16(%rdi), %xmm{{.*}}
;CHECK: vmovhpd 8(%rdi), %xmm{{.*}}
;CHECK: vmovsd 24(%rdi), %xmm{{.*}}
;CHECK: vmovhpd 8(%rdi), %xmm{{.*}}
;CHECK: vmovupd %xmm{{.*}}, (%rdi)
;CHECK: vmovupd %xmm{{.*}}, 16(%rdi)
;CHECK: ret
@ -35,4 +35,3 @@ entry:
store <2 x double> %14, <2 x double>* %15, align 8
ret void
}

View File

@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
entry:
; CHECK-LABEL: test1:
; CHECK: movl $12, %eax
; CHECK-NEXT: btl
; CHECK: btl
; CHECK-NEXT: movl $12, %eax
; CHECK-NEXT: cmovael (%rcx), %eax
; CHECK-NEXT: ret
@ -19,8 +19,8 @@ entry:
define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone {
entry:
; CHECK-LABEL: test2:
; CHECK: movl $12, %eax
; CHECK-NEXT: btl
; CHECK: btl
; CHECK-NEXT: movl $12, %eax
; CHECK-NEXT: cmovbl (%rcx), %eax
; CHECK-NEXT: ret
@ -92,7 +92,7 @@ bb.i.i.i: ; preds = %entry
; CHECK: testb
; CHECK-NOT: xor
; CHECK: setne
; CHECK-NEXT: testb
; CHECK: testb
func_4.exit.i: ; preds = %bb.i.i.i, %entry
%.not.i = xor i1 %2, true ; <i1> [#uses=1]

View File

@ -38,10 +38,10 @@ define i32 @t2(i32 %X, i32 %Y) nounwind {
define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind {
entry:
; DARWIN-LABEL: t3:
; DARWIN: shll $16
; DARWIN: shlq $32, %rcx
; DARWIN-NEXT: orq %rcx, %rax
; DARWIN-NEXT: shll $8
; DARWIN-NOT: leaq
; DARWIN: orq %rcx, %rax
%tmp21 = zext i32 %lb to i64
%tmp23 = zext i32 %ub to i64
%tmp24 = shl i64 %tmp23, 32

View File

@ -40,7 +40,7 @@ entry:
; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx
; ATOM: _t:
; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx
; ATOM: movl $0, %eax
; ATOM: movl L_LotsStuff$non_lazy_ptr, %e{{..}}
; ATOM: movl $0, %e{{..}}
}

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s
; CHECK: movsd %xmm0, 8(%esp)
; CHECK: xorl %ecx, %ecx
; CHECK: movsd %xmm{{[0-9]}}, 8(%esp)
; CHECK: xorl %eax, %eax
@d = external global double ; <double*> [#uses=1]
@c = external global double ; <double*> [#uses=1]

View File

@ -38,10 +38,10 @@ L:
store i16 %A, i16* %Q
ret i32 %D
; CHECK-LABEL: test2:
; CHECK: movl 4(%esp), %eax
; CHECK-NEXT: movzwl (%eax), %ecx
; CHECK-NEXT: movzwl (%eax), %e{{..}}
}

View File

@ -54,22 +54,27 @@ forbody: ; preds = %forcond
%mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2]
%mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1]
%cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1]
%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1]
%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1]
call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
%bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2]
%andps.i14 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %bitcast6.i13 ; <<4 x i32>> [#uses=1]
%not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%andnps.i17 = add <4 x i32> <i32 1, i32 1, i32 1, i32 1>, %not.i16 ; <<4 x i32>> [#uses=1]
%orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1]
%bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1]
%tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
%bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1]
%andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1]
%bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1]
%not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1]
call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind
%orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1]
%bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1]
%tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1]
%bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=2]
%andps.i = and <4 x i32> zeroinitializer, %bitcast6.i ; <<4 x i32>> [#uses=1]
%bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32> ; <<4 x i32>> [#uses=1]

View File

@ -4,7 +4,7 @@
define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind {
; ATOM: foo
; ATOM: addl
; ATOM: leal
; ATOM: addl
; ATOM: leal
; CHECK: foo

View File

@ -1,21 +1,35 @@
; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s
; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN
; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN
; rdar://7398554
; When doing vector gather-scatter index calculation with 32-bit indices,
; bounce the vector off of cache rather than shuffling each individual
; element out of the index vector.
; CHECK: andps ([[H:%rdx|%r8]]), %xmm0
; CHECK: movaps %xmm0, {{(-24)?}}(%rsp)
; CHECK: movslq {{(-24)?}}(%rsp), %rax
; CHECK: movsd ([[P:%rdi|%rcx]],%rax,8), %xmm0
; CHECK: movslq {{-20|4}}(%rsp), %rax
; CHECK: movhpd ([[P]],%rax,8), %xmm0
; CHECK: movslq {{-16|8}}(%rsp), %rax
; CHECK: movsd ([[P]],%rax,8), %xmm1
; CHECK: movslq {{-12|12}}(%rsp), %rax
; CHECK: movhpd ([[P]],%rax,8), %xmm1
; CHECK: foo:
; LIN: movaps (%rsi), %xmm0
; LIN: andps (%rdx), %xmm0
; LIN: movaps %xmm0, -24(%rsp)
; LIN: movslq -24(%rsp), %[[REG1:r.+]]
; LIN: movslq -20(%rsp), %[[REG2:r.+]]
; LIN: movslq -16(%rsp), %[[REG3:r.+]]
; LIN: movslq -12(%rsp), %[[REG4:r.+]]
; LIN: movsd (%rdi,%[[REG1]],8), %xmm0
; LIN: movhpd (%rdi,%[[REG2]],8), %xmm0
; LIN: movsd (%rdi,%[[REG3]],8), %xmm1
; LIN: movhpd (%rdi,%[[REG4]],8), %xmm1
; WIN: movaps (%rdx), %xmm0
; WIN: andps (%r8), %xmm0
; WIN: movaps %xmm0, (%rsp)
; WIN: movslq (%rsp), %[[REG1:r.+]]
; WIN: movslq 4(%rsp), %[[REG2:r.+]]
; WIN: movslq 8(%rsp), %[[REG3:r.+]]
; WIN: movslq 12(%rsp), %[[REG4:r.+]]
; WIN: movsd (%rcx,%[[REG1]],8), %xmm0
; WIN: movhpd (%rcx,%[[REG2]],8), %xmm0
; WIN: movsd (%rcx,%[[REG3]],8), %xmm1
; WIN: movhpd (%rcx,%[[REG4]],8), %xmm1
define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind {
%a = load <4 x i32>* %i

View File

@ -28,10 +28,10 @@ entry:
define cc 10 void @foo() nounwind {
entry:
; CHECK: movl base, %ebx
; CHECK-NEXT: movl sp, %ebp
; CHECK: movl r1, %esi
; CHECK-NEXT: movl hp, %edi
; CHECK-NEXT: movl r1, %esi
; CHECK-NEXT: movl sp, %ebp
; CHECK-NEXT: movl base, %ebx
%0 = load i32* @r1
%1 = load i32* @hp
%2 = load i32* @sp
@ -42,4 +42,3 @@ entry:
}
declare cc 10 void @bar(i32, i32, i32, i32)

View File

@ -41,22 +41,22 @@ entry:
define cc 10 void @foo() nounwind {
entry:
; CHECK: movq base(%rip), %r13
; CHECK-NEXT: movq sp(%rip), %rbp
; CHECK-NEXT: movq hp(%rip), %r12
; CHECK-NEXT: movq r1(%rip), %rbx
; CHECK-NEXT: movq r2(%rip), %r14
; CHECK-NEXT: movq r3(%rip), %rsi
; CHECK-NEXT: movq r4(%rip), %rdi
; CHECK-NEXT: movq r5(%rip), %r8
; CHECK-NEXT: movq r6(%rip), %r9
; CHECK-NEXT: movq splim(%rip), %r15
; CHECK-NEXT: movss f1(%rip), %xmm1
; CHECK-NEXT: movss f2(%rip), %xmm2
; CHECK-NEXT: movss f3(%rip), %xmm3
; CHECK-NEXT: movss f4(%rip), %xmm4
; CHECK: movsd d2(%rip), %xmm6
; CHECK-NEXT: movsd d1(%rip), %xmm5
; CHECK-NEXT: movsd d2(%rip), %xmm6
; CHECK-NEXT: movss f4(%rip), %xmm4
; CHECK-NEXT: movss f3(%rip), %xmm3
; CHECK-NEXT: movss f2(%rip), %xmm2
; CHECK-NEXT: movss f1(%rip), %xmm1
; CHECK-NEXT: movq splim(%rip), %r15
; CHECK-NEXT: movq r6(%rip), %r9
; CHECK-NEXT: movq r5(%rip), %r8
; CHECK-NEXT: movq r4(%rip), %rdi
; CHECK-NEXT: movq r3(%rip), %rsi
; CHECK-NEXT: movq r2(%rip), %r14
; CHECK-NEXT: movq r1(%rip), %rbx
; CHECK-NEXT: movq hp(%rip), %r12
; CHECK-NEXT: movq sp(%rip), %rbp
; CHECK-NEXT: movq base(%rip), %r13
%0 = load double* @d2
%1 = load double* @d1
%2 = load float* @f4
@ -83,4 +83,3 @@ entry:
declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64,
float, float, float, float, double, double)

View File

@ -49,10 +49,10 @@ entry:
store i32 %arg1, i32* %arg1_var
store i32 %arg2, i32* %arg2_var
; CHECK: movl 4(%esp), %edx
; CHECK-NEXT: movl 8(%esp), %eax
; CHECK: movl 16(%esp), %esi
; CHECK-NEXT: movl 12(%esp), %ebp
; CHECK-NEXT: movl 16(%esp), %esi
; CHECK-NEXT: movl 8(%esp), %eax
; CHECK-NEXT: movl 4(%esp), %edx
%0 = load i32* %hp_var
%1 = load i32* %p_var
%2 = load i32* %arg0_var

View File

@ -5,10 +5,10 @@
define void @zap(i64 %a, i64 %b) nounwind {
entry:
; CHECK: movq %rsi, %rax
; CHECK-NEXT: movq %rdi, %rsi
; CHECK-NEXT: movq %rax, %rdx
; CHECK-NEXT: movl $8, %ecx
; CHECK-NEXT: movl $9, %r8d
; CHECK-NEXT: movq %rdi, %rsi
; CHECK-NEXT: movq %rax, %rdx
; CHECK-NEXT: callq addfour
%0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
%res = extractvalue {i64, i64, i64} %0, 2
@ -57,11 +57,11 @@ entry:
store i64 %arg2, i64* %arg2_var
store i64 %arg3, i64* %arg3_var
; CHECK: movq 8(%rsp), %rcx
; CHECK-NEXT: movq 16(%rsp), %rdx
; CHECK-NEXT: movq 24(%rsp), %rsi
; CHECK: movq 40(%rsp), %r15
; CHECK-NEXT: movq 32(%rsp), %rbp
; CHECK-NEXT: movq 40(%rsp), %r15
; CHECK-NEXT: movq 24(%rsp), %rsi
; CHECK-NEXT: movq 16(%rsp), %rdx
; CHECK-NEXT: movq 8(%rsp), %rcx
%0 = load i64* %hp_var
%1 = load i64* %p_var
%2 = load i64* %arg0_var

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86-64 | grep lea | count 12
; RUN: llc < %s -march=x86-64 | grep lea | count 13
; This testcase was written to demonstrate an instruction-selection problem,
; however it also happens to expose a limitation in the DAGCombiner's
@ -44,4 +44,3 @@ entry:
store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7)
ret void
}

View File

@ -28,8 +28,7 @@ bb.nph:
bb2:
ret i32 %x_offs
; CHECK-LABEL: test2:
; CHECK: movl %e[[A0]], %eax
; CHECK: addl $-5, %eax
; CHECK: leal -5(%r[[A0:..]]), %eax
; CHECK: andl $-4, %eax
; CHECK: negl %eax
; CHECK: leal -4(%r[[A0]],%rax), %eax

View File

@ -17,14 +17,14 @@
; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned.
;
; STRESS-LABEL: t1:
; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
; STRESS: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]]
; Add high slice: out[out_start].imm, this is base + 4.
; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
; STRESS-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]]
; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]]
; Add low slice: out[out_start].real, this is base + 0.
; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
; Add high slice: out[out_start].imm, this is base + 4.
; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
; Swap Imm and Real.
; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
; Put the results back into out[out_start].
@ -32,14 +32,14 @@
;
; Same for REGULAR, we eliminate register bank copy with each slices.
; REGULAR-LABEL: t1:
; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
; REGULAR: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]]
; Add high slice: out[out_start].imm, this is base + 4.
; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
; Load out[out_start + 8].real, this is base + 8 * 8 + 0.
; REGULAR-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]]
; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]]
; Add low slice: out[out_start].real, this is base + 0.
; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]]
; Load out[out_start + 8].imm, this is base + 8 * 8 + 4.
; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]]
; Add high slice: out[out_start].imm, this is base + 4.
; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]]
; Swap Imm and Real.
; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]]
; Put the results back into out[out_start].
@ -137,4 +137,3 @@ define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) {
%res = add i32 %slice32_lowhigh, %tmpres
ret i32 %res
}

View File

@ -2,12 +2,12 @@
; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
; CHECK-LABEL: t:
; CHECK: decq
; CHECK-NEXT: movl (%r9,%rax,4), %eax
; CHECK: movl (%r9,%rax,4), %e{{..}}
; CHECK-NEXT: decq
; CHECK-NEXT: jne
; ATOM-LABEL: t:
; ATOM: movl (%r9,%r{{.+}},4), %eax
; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}
; ATOM-NEXT: decq
; ATOM-NEXT: jne

View File

@ -1,15 +1,13 @@
; RUN: llc < %s -mcpu=generic -march=x86-64 > %t
; RUN: not grep and %t
; RUN: not grep movz %t
; RUN: not grep sar %t
; RUN: not grep shl %t
; RUN: grep add %t | count 5
; RUN: grep inc %t | count 2
; RUN: grep lea %t | count 3
; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s
; Optimize away zext-inreg and sext-inreg on the loop induction
; variable using trip-count information.
; CHECK-LABEL: count_up
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: inc
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_up(double* %d, i64 %n) nounwind {
entry:
br label %loop
@ -38,6 +36,11 @@ return:
ret void
}
; CHECK-LABEL: count_down
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: addq
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_down(double* %d, i64 %n) nounwind {
entry:
br label %loop
@ -66,6 +69,11 @@ return:
ret void
}
; CHECK-LABEL: count_up_signed
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: inc
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_up_signed(double* %d, i64 %n) nounwind {
entry:
br label %loop
@ -96,6 +104,11 @@ return:
ret void
}
; CHECK-LABEL: count_down_signed
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: addq
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @count_down_signed(double* %d, i64 %n) nounwind {
entry:
br label %loop
@ -126,6 +139,11 @@ return:
ret void
}
; CHECK-LABEL: another_count_up
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: addq
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_up(double* %d, i64 %n) nounwind {
entry:
br label %loop
@ -154,6 +172,11 @@ return:
ret void
}
; CHECK-LABEL: another_count_down
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: decq
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_down(double* %d, i64 %n) nounwind {
entry:
br label %loop
@ -182,6 +205,11 @@ return:
ret void
}
; CHECK-LABEL: another_count_up_signed
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: addq
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_up_signed(double* %d, i64 %n) nounwind {
entry:
br label %loop
@ -212,6 +240,11 @@ return:
ret void
}
; CHECK-LABEL: another_count_down_signed
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: decq
; CHECK-NOT: {{and|movz|sar|shl}}
; CHECK: jne
define void @another_count_down_signed(double* %d, i64 %n) nounwind {
entry:
br label %loop

View File

@ -56,15 +56,15 @@ entry:
define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
entry:
; SSE2-Darwin-LABEL: t2:
; SSE2-Darwin: movaps (%eax), %xmm0
; SSE2-Darwin: movaps (%ecx), %xmm0
; SSE2-Darwin: movaps %xmm0, (%eax)
; SSE2-Mingw32-LABEL: t2:
; SSE2-Mingw32: movaps (%eax), %xmm0
; SSE2-Mingw32: movaps (%ecx), %xmm0
; SSE2-Mingw32: movaps %xmm0, (%eax)
; SSE1-LABEL: t2:
; SSE1: movaps (%eax), %xmm0
; SSE1: movaps (%ecx), %xmm0
; SSE1: movaps %xmm0, (%eax)
; NOSSE-LABEL: t2:
@ -91,14 +91,14 @@ entry:
define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp {
entry:
; SSE2-Darwin-LABEL: t3:
; SSE2-Darwin: movsd (%eax), %xmm0
; SSE2-Darwin: movsd 8(%eax), %xmm1
; SSE2-Darwin: movsd (%ecx), %xmm0
; SSE2-Darwin: movsd 8(%ecx), %xmm1
; SSE2-Darwin: movsd %xmm1, 8(%eax)
; SSE2-Darwin: movsd %xmm0, (%eax)
; SSE2-Mingw32-LABEL: t3:
; SSE2-Mingw32: movsd (%eax), %xmm0
; SSE2-Mingw32: movsd 8(%eax), %xmm1
; SSE2-Mingw32: movsd (%ecx), %xmm0
; SSE2-Mingw32: movsd 8(%ecx), %xmm1
; SSE2-Mingw32: movsd %xmm1, 8(%eax)
; SSE2-Mingw32: movsd %xmm0, (%eax)

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t
; RUN: grep pmul %t | count 12
; RUN: grep mov %t | count 11
; RUN: grep mov %t | count 14
define <4 x i32> @a(<4 x i32> %i) nounwind {
%A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 >

View File

@ -19,7 +19,14 @@ return:
ret i32 %retval.0
}
; We were miscompiling this and using %ax instead of %cx in the movw.
; CHECK: movswl %cx, %ecx
; CHECK: movw %cx, (%rsi)
; CHECK: movslq %ecx, %rcx
; We were miscompiling this and using %ax instead of %cx in the movw
; in the following sequence:
; movswl %cx, %ecx
; movw %cx, (%rsi)
; movslq %ecx, %rcx
;
; We can't produce the above sequence without special SD-level
; heuristics. Now we produce this:
; CHECK: movw %ax, (%rsi)
; CHECK: cwtl
; CHECK: cltq

View File

@ -57,11 +57,10 @@ entry:
%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
; reload:
; CHECK: fld
; CHECK: fstps
; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp3940 = fpext float %tmp1314 to double ; <double> [#uses=1]
; CHECK: fld
; CHECK: fstpl
; CHECK: ZNSolsEd
%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s
; CHECK-LABEL: main:
; CHECK: pushl %esi

View File

@ -1,5 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \
; RUN: 2>&1 | FileCheck %s
; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \
; RUN-disabled: 2>&1 | FileCheck %s
; RUN: true
; REQUIRES: asserts
;
; rdar:13279013: pre-RA-sched should not check all interferences and

View File

@ -11,10 +11,10 @@ define i32 @_rdrand16_step(i16* %random_val) {
ret i32 %isvalid
; CHECK-LABEL: _rdrand16_step:
; CHECK: rdrandw %ax
; CHECK: movw %ax, (%r[[A0:di|cx]])
; CHECK: movzwl %ax, %ecx
; CHECK: movl $1, %eax
; CHECK: cmovael %ecx, %eax
; CHECK: movw %cx, (%r[[A0:di|cx]])
; CHECK: ret
}
@ -26,9 +26,9 @@ define i32 @_rdrand32_step(i32* %random_val) {
ret i32 %isvalid
; CHECK-LABEL: _rdrand32_step:
; CHECK: rdrandl %e[[T0:[a-z]+]]
; CHECK: movl %e[[T0]], (%r[[A0]])
; CHECK: movl $1, %eax
; CHECK: cmovael %e[[T0]], %eax
; CHECK: movl %e[[T0]], (%r[[A0]])
; CHECK: ret
}
@ -40,9 +40,9 @@ define i32 @_rdrand64_step(i64* %random_val) {
ret i32 %isvalid
; CHECK-LABEL: _rdrand64_step:
; CHECK: rdrandq %r[[T1:[a-z]+]]
; CHECK: movq %r[[T1]], (%r[[A0]])
; CHECK: movl $1, %eax
; CHECK: cmovael %e[[T1]], %eax
; CHECK: movq %r[[T1]], (%r[[A0]])
; CHECK: ret
}

View File

@ -12,10 +12,10 @@ define i32 @_rdseed16_step(i16* %random_val) {
ret i32 %isvalid
; CHECK-LABEL: _rdseed16_step:
; CHECK: rdseedw %ax
; CHECK: movw %ax, (%r[[A0:di|cx]])
; CHECK: movzwl %ax, %ecx
; CHECK: movl $1, %eax
; CHECK: cmovael %ecx, %eax
; CHECK: movw %cx, (%r[[A0:di|cx]])
; CHECK: ret
}
@ -27,9 +27,9 @@ define i32 @_rdseed32_step(i32* %random_val) {
ret i32 %isvalid
; CHECK-LABEL: _rdseed32_step:
; CHECK: rdseedl %e[[T0:[a-z]+]]
; CHECK: movl %e[[T0]], (%r[[A0]])
; CHECK: movl $1, %eax
; CHECK: cmovael %e[[T0]], %eax
; CHECK: movl %e[[T0]], (%r[[A0]])
; CHECK: ret
}
@ -41,8 +41,8 @@ define i32 @_rdseed64_step(i64* %random_val) {
ret i32 %isvalid
; CHECK-LABEL: _rdseed64_step:
; CHECK: rdseedq %r[[T1:[a-z]+]]
; CHECK: movq %r[[T1]], (%r[[A0]])
; CHECK: movl $1, %eax
; CHECK: cmovael %e[[T1]], %eax
; CHECK: movq %r[[T1]], (%r[[A0]])
; CHECK: ret
}

View File

@ -31,7 +31,7 @@ false:
; X32-NEXT: ret
; X32: movl %esp, %eax
; X32-NEXT: subl %ecx, %eax
; X32: subl %ecx, %eax
; X32-NEXT: cmpl %eax, %gs:48
; X32: movl %eax, %esp
@ -52,7 +52,7 @@ false:
; X64-NEXT: ret
; X64: movq %rsp, %[[RDI:rdi|rax]]
; X64-NEXT: subq %{{.*}}, %[[RDI]]
; X64: subq %{{.*}}, %[[RDI]]
; X64-NEXT: cmpq %[[RDI]], %fs:112
; X64: movq %[[RDI]], %rsp

View File

@ -34,12 +34,12 @@ bb90: ; preds = %bb84, %bb72
bb91: ; preds = %bb84
ret i32 0
; CHECK-LABEL: test2:
; CHECK: movnew
; CHECK: movswl
; CHECK: cmovnew
; CHECK: cwtl
; ATOM-LABEL: test2:
; ATOM: movnew
; ATOM: movswl
; ATOM: cmovnew
; ATOM: cwtl
}
declare i1 @return_false()
@ -256,8 +256,8 @@ entry:
%call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone
ret i8* %call
; CHECK-LABEL: test12:
; CHECK: movq $-1, %[[R:r..]]
; CHECK: mulq
; CHECK: movq $-1, %[[R:r..]]
; CHECK: cmovnoq %rax, %[[R]]
; CHECK: jmp __Znam

View File

@ -30,10 +30,11 @@ entry:
%x = load i32* %p
%shl = shl i32 %x, %shamt
; BMI2: shl32p
; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; Source order scheduling prevents folding, rdar:14208996.
; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI2: ret
; BMI264: shl32p
; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: ret
ret i32 %shl
}
@ -74,7 +75,7 @@ entry:
%x = load i64* %p
%shl = shl i64 %x, %shamt
; BMI264: shl64p
; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: ret
ret i64 %shl
}
@ -106,10 +107,11 @@ entry:
%x = load i32* %p
%shl = lshr i32 %x, %shamt
; BMI2: lshr32p
; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; Source order scheduling prevents folding, rdar:14208996.
; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI2: ret
; BMI264: lshr32p
; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: ret
ret i32 %shl
}
@ -128,7 +130,7 @@ entry:
%x = load i64* %p
%shl = lshr i64 %x, %shamt
; BMI264: lshr64p
; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: ret
ret i64 %shl
}
@ -150,10 +152,11 @@ entry:
%x = load i32* %p
%shl = ashr i32 %x, %shamt
; BMI2: ashr32p
; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; Source order scheduling prevents folding, rdar:14208996.
; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI2: ret
; BMI264: ashr32p
; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: ret
ret i32 %shl
}
@ -172,7 +175,7 @@ entry:
%x = load i64* %p
%shl = ashr i64 %x, %shamt
; BMI264: ashr64p
; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}}
; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}}
; BMI264: ret
ret i64 %shl
}

View File

@ -26,11 +26,10 @@ define double @foo(double %x, double %y, i1 %c) nounwind {
; CHECK-LABEL: split:
; CHECK-NEXT: testb $1, %dil
; CHECK-NEXT: jne
; CHECK-NEXT: movaps
; CHECK-NEXT: ret
; CHECK-NEXT: je
; CHECK: divsd
; CHECK-NEXT: ret
; CHECK: movaps
; CHECK: ret
define double @split(double %x, double %y, i1 %c) nounwind {
%a = fdiv double %x, 3.2
%z = select i1 %c, double %a, double %y
@ -65,7 +64,7 @@ return:
; Sink instructions with dead EFLAGS defs.
; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag.
;
;
; See <rdar://problem/8030636>. This test isn't valid after we made machine
; sinking more conservative about sinking instructions that define a preg into a
; block when we don't know if the preg is killed within the current block.

View File

@ -7,7 +7,7 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
ret void
; CHECK-LABEL: test1:
; CHECK: movl 8(%esp), %eax
; CHECK-NEXT: movapd (%eax), %xmm0
@ -23,12 +23,12 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind {
%tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 >
store <2 x double> %tmp9, <2 x double>* %r, align 16
ret void
; CHECK-LABEL: test2:
; CHECK: movl 8(%esp), %eax
; CHECK-NEXT: movapd (%eax), %xmm0
; CHECK: movl 4(%esp), %eax
; CHECK: movl 8(%esp), %ecx
; CHECK-NEXT: movapd (%ecx), %xmm0
; CHECK-NEXT: movhpd 12(%esp), %xmm0
; CHECK-NEXT: movl 4(%esp), %eax
; CHECK-NEXT: movapd %xmm0, (%eax)
; CHECK-NEXT: ret
}
@ -48,7 +48,7 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind
store <4 x float> %tmp13, <4 x float>* %res
ret void
; CHECK: @test3
; CHECK: unpcklps
; CHECK: unpcklps
}
define void @test4(<4 x float> %X, <4 x float>* %res) nounwind {
@ -85,9 +85,9 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind {
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1]
store <4 x float> %tmp2, <4 x float>* %res
ret void
; CHECK-LABEL: test6:
; CHECK: movaps (%eax), %xmm0
; CHECK: movaps (%ecx), %xmm0
; CHECK: movaps %xmm0, (%eax)
}
@ -96,7 +96,7 @@ define void @test7() nounwind {
shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1]
store <4 x float> %2, <4 x float>* null
ret void
; CHECK-LABEL: test7:
; CHECK: xorps %xmm0, %xmm0
; CHECK: movaps %xmm0, 0
@ -166,7 +166,7 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl
store <4 x float> %tmp11, <4 x float>* %res
ret void
; CHECK: test13
; CHECK: shufps $69, (%eax), %xmm0
; CHECK: shufps $69, (%ecx), %xmm0
; CHECK: pshufd $-40, %xmm0, %xmm0
}
@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
%tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
ret <4 x float> %tmp27
; CHECK-LABEL: test14:
; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]]
; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]]
; CHECK: movlhps [[X2]], [[X0]]
}
@ -221,4 +221,3 @@ entry:
%double2float.i = fptrunc <4 x double> %0 to <4 x float>
ret <4 x float> %double2float.i
}

View File

@ -12,7 +12,7 @@ entry:
%D = or i32 %C, %B
store i32 %D, i32* %a0, align 4
ret void
; X64-LABEL: test1:
; X64: movb %sil, (%rdi)
@ -34,8 +34,8 @@ entry:
; X64: movb %sil, 1(%rdi)
; X32-LABEL: test2:
; X32: movb 8(%esp), %al
; X32: movb %al, 1(%{{.*}})
; X32: movb 8(%esp), %[[REG:[abcd]l]]
; X32: movb %[[REG]], 1(%{{.*}})
}
define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@ -67,8 +67,8 @@ entry:
; X64: movw %si, 2(%rdi)
; X32-LABEL: test4:
; X32: movl 8(%esp), %eax
; X32: movw %ax, 2(%{{.*}})
; X32: movl 8(%esp), %e[[REG:[abcd]x]]
; X32: movw %[[REG]], 2(%{{.*}})
}
define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp {
@ -84,8 +84,8 @@ entry:
; X64: movw %si, 2(%rdi)
; X32-LABEL: test5:
; X32: movzwl 8(%esp), %eax
; X32: movw %ax, 2(%{{.*}})
; X32: movzwl 8(%esp), %e[[REG:[abcd]x]]
; X32: movw %[[REG]], 2(%{{.*}})
}
define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp {
@ -102,8 +102,8 @@ entry:
; X32-LABEL: test6:
; X32: movb 8(%esp), %al
; X32: movb %al, 5(%{{.*}})
; X32: movb 8(%esp), %[[REG:[abcd]l]]
; X32: movb %[[REG]], 5(%{{.*}})
}
define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind {
@ -121,8 +121,8 @@ entry:
; X32-LABEL: test7:
; X32: movb 8(%esp), %cl
; X32: movb %cl, 5(%{{.*}})
; X32: movb 8(%esp), %[[REG:[abcd]l]]
; X32: movb %[[REG]], 5(%{{.*}})
}
; PR7833

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s
declare fastcc i32 @callee(i32 %arg)
define fastcc i32 @directcall(i32 %arg) {

View File

@ -2,10 +2,10 @@
; rdar://5752025
; We want:
; CHECK: movl $42, %ecx
; CHECK-NEXT: movl 4(%esp), %eax
; CHECK-NEXT: andl $15, %eax
; CHECK-NEXT: cmovnel %ecx, %eax
; CHECK: movl 4(%esp), %ecx
; CHECK-NEXT: andl $15, %ecx
; CHECK-NEXT: movl $42, %eax
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: ret
;
; We don't want:
@ -39,4 +39,3 @@ entry:
%retval = select i1 %tmp4, i32 %tmp2, i32 42 ; <i32> [#uses=1]
ret i32 %retval
}

View File

@ -22,7 +22,7 @@ ret_false:
ret i1 false
}
; CHECK-LABEL: test2:
; CHECK: btl %eax
; CHECK: btl
define i32 @test3(i8* %ptr) nounwind {
%val = load i8* %ptr

View File

@ -1,7 +1,7 @@
; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s
; CHECK: divps
; CHECK: divps
; CHECK: divss
; CHECK: divps
; CHECK: divps
%vec = type <9 x float>
define %vec @vecdiv( %vec %p1, %vec %p2)
@ -9,4 +9,3 @@ define %vec @vecdiv( %vec %p1, %vec %p2)
%result = fdiv %vec %p1, %p2
ret %vec %result
}

View File

@ -2,9 +2,9 @@
; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s
%vec = type <6 x float>
; CHECK: divss
; CHECK: divss
; CHECK: divps
; CHECK: divss
; CHECK: divss
; Scheduler causes a different instruction order to be produced on Intel Atom
; ATOM: divps

View File

@ -7,10 +7,10 @@ target triple = "i686-apple-cl.1.0"
define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone {
entry:
; CHECK: subps
; CHECK: mulps
; CHECK: addps
; CHECK: subps
; CHECK: mulps
; CHECK: mulps
; CHECK: addps
; CHECK: addps
%tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x float>> [#uses=1]
%sub = fsub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1]

View File

@ -54,8 +54,8 @@ entry:
define <2 x double> @t3() nounwind readonly {
bb:
; CHECK-LABEL: t3:
; CHECK: punpcklqdq %xmm1, %xmm0
; CHECK: movq (%rax), %xmm1
; CHECK: punpcklqdq %xmm2, %xmm0
; CHECK: movsd %xmm1, %xmm0
%tmp0 = load i128* null, align 1
%tmp1 = load <2 x i32>* undef, align 8
@ -72,9 +72,9 @@ bb:
define <2 x i64> @t4() nounwind readonly {
bb:
; CHECK-LABEL: t4:
; CHECK: punpcklqdq %xmm0, %xmm1
; CHECK: movq (%rax), %xmm0
; CHECK: movsd %xmm1, %xmm0
; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]]
; CHECK: movsd %[[XMM]], %xmm0
%tmp0 = load i128* null, align 1
%tmp1 = load <2 x i32>* undef, align 8
%tmp2 = bitcast i128 %tmp0 to <16 x i8>

View File

@ -1,8 +1,8 @@
; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s
; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s
; CHECK: paddd
; CHECK: movl
; CHECK: paddd
; CHECK: movlpd
; Scheduler causes produce a different instruction order

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64
; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64
; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI
; PR8777
; PR8778
@ -52,18 +52,18 @@ entry:
%r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind
; M64: subq $48, %rsp
; M64: leaq -4096(%rbp), %r9
; M64: movq %rax, 32(%rsp)
; M64: leaq -4096(%rbp), %r9
; M64: callq bar
; W64: subq $48, %rsp
; W64: leaq -4096(%rbp), %r9
; W64: movq %rax, 32(%rsp)
; W64: leaq -4096(%rbp), %r9
; W64: callq bar
; EFI: subq $48, %rsp
; EFI: leaq -[[B0OFS]](%rbp), %r9
; EFI: movq [[R64]], 32(%rsp)
; EFI: leaq -[[B0OFS]](%rbp), %r9
; EFI: callq _bar
ret i64 %r

View File

@ -4,8 +4,8 @@
; This test checks that the operands of packed sub instructions are
; never interchanged by the "Two-Address instruction pass".
declare { i64, double } @getFirstParam()
declare { i64, double } @getSecondParam()
declare { i64, double } @getFirstParam()
declare { i64, double } @getSecondParam()
define i64 @test_psubb() {
entry:
@ -28,9 +28,10 @@ entry:
; CHECK-LABEL: test_psubb:
; CHECK: callq getFirstParam
; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
; CHECK: callq getSecondParam
; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
; CHECK: psubb [[PARAM2]], [[PARAM1]]
; CHECK: ret
@ -55,9 +56,10 @@ entry:
; CHECK-LABEL: test_psubw:
; CHECK: callq getFirstParam
; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
; CHECK: callq getSecondParam
; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
; CHECK: psubw [[PARAM2]], [[PARAM1]]
; CHECK: ret
@ -83,9 +85,10 @@ entry:
; CHECK-LABEL: test_psubd:
; CHECK: callq getFirstParam
; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
; CHECK: callq getSecondParam
; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
; CHECK: psubd [[PARAM2]], [[PARAM1]]
; CHECK: ret
@ -110,9 +113,10 @@ entry:
; CHECK-LABEL: test_psubsb:
; CHECK: callq getFirstParam
; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
; CHECK: callq getSecondParam
; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
; CHECK: psubsb [[PARAM2]], [[PARAM1]]
; CHECK: ret
@ -137,9 +141,10 @@ entry:
; CHECK-LABEL: test_psubswv:
; CHECK: callq getFirstParam
; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
; CHECK: callq getSecondParam
; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
; CHECK: psubsw [[PARAM2]], [[PARAM1]]
; CHECK: ret
@ -164,9 +169,10 @@ entry:
; CHECK-LABEL: test_psubusbv:
; CHECK: callq getFirstParam
; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
; CHECK: callq getSecondParam
; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
; CHECK: psubusb [[PARAM2]], [[PARAM1]]
; CHECK: ret
@ -191,9 +197,10 @@ entry:
; CHECK-LABEL: test_psubuswv:
; CHECK: callq getFirstParam
; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]]
; CHECK: callq getSecondParam
; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]]
; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]]
; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]]
; CHECK: psubusw [[PARAM2]], [[PARAM1]]
; CHECK: ret

View File

@ -6,8 +6,8 @@
define <4 x i32> @shl4(<4 x i32> %A) nounwind {
entry:
; CHECK: shl4
; CHECK: padd
; CHECK: pslld
; CHECK: padd
; CHECK: ret
%B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2>
%C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1>
@ -67,8 +67,8 @@ entry:
define <8 x i16> @shl8(<8 x i16> %A) nounwind {
entry:
; CHECK: shl8
; CHECK: padd
; CHECK: psllw
; CHECK: padd
; CHECK: ret
%B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
%C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s
; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s
;; Simple case
define i32 @test1(i8 %x) nounwind readnone {
@ -10,7 +10,7 @@ define i32 @test1(i8 %x) nounwind readnone {
; CHECK: movzbl
; CHECK-NEXT: andl {{.*}}224
;; Multiple uses of %x but easily extensible.
;; Multiple uses of %x but easily extensible.
define i32 @test2(i8 %x) nounwind readnone {
%A = and i8 %x, -32
%B = zext i8 %A to i32
@ -21,8 +21,8 @@ define i32 @test2(i8 %x) nounwind readnone {
}
; CHECK: test2
; CHECK: movzbl
; CHECK: orl $63
; CHECK: andl $224
; CHECK: orl $63
declare void @use(i32, i8)

View File

@ -34,10 +34,10 @@ entry:
%tmp12 = add i64 %tmp11, 5089792279245435153
; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]]
; CHECK-NEXT: cmpl $-8608074, %e[[REGISTER_zext]]
; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]]
; CHECK: movq [[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]]
; CHECK: cmpl $-8608074, %e[[REGISTER_zext]]
; CHECK-NOT: [[REGISTER_zext]]
; CHECK-DAG: testl %e[[REGISTER_zext]]
; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]]
%tmp13 = sub i64 %tmp12, 2138875574

View File

@ -16,7 +16,7 @@ entry:
call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14
%tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15
%tmp3 = add i32 0, %tmp2, !dbg !15
; CHECK: ##DEBUG_VALUE: idx <- EAX{{$}}
; CHECK: ##DEBUG_VALUE: idx <- E{{..$}}
call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg
!15
%arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16