From e97d8d6dde16105555aa148dada45f486b62e07f Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Tue, 15 Oct 2013 23:33:07 +0000 Subject: [PATCH] Enable MI Sched for x86. This changes the SelectionDAG scheduling preference to source order. Soon, the SelectionDAG scheduler can be bypassed saving a nice chunk of compile time. Performance differences that result from this change are often a consequence of register coalescing. The register coalescer is far from perfect. Bugs can be filed for deficiencies. On x86 SandyBridge/Haswell, the source order schedule is often preserved, particularly for small blocks. Register pressure is generally improved over the SD scheduler's ILP mode. However, we are still able to handle large blocks that require latency hiding, unlike the SD scheduler's BURR mode. MI scheduler also attempts to discover the critical path in single-block loops and adjust heuristics accordingly. The MI scheduler relies on the new machine model. This is currently unimplemented for AVX, so we may not be generating the best code yet. Unit tests are updated so they don't depend on SD scheduling heuristics. llvm-svn: 192750 --- llvm/lib/Target/X86/X86Subtarget.h | 5 +- .../CodeGen/X86/2006-05-02-InstrSched1.ll | 6 ++- .../test/CodeGen/X86/2007-01-08-InstrSched.ll | 4 +- .../CodeGen/X86/2009-02-26-MachineLICMBug.ll | 4 +- .../X86/2010-02-19-TailCallRetAddrBug.ll | 6 +-- .../X86/2010-09-17-SideEffectsInChain.ll | 8 +-- .../CodeGen/X86/2011-10-19-LegelizeLoad.ll | 3 +- llvm/test/CodeGen/X86/2012-04-26-sdglue.ll | 4 +- llvm/test/CodeGen/X86/3addr-16bit.ll | 7 +-- llvm/test/CodeGen/X86/StackColoring.ll | 4 +- llvm/test/CodeGen/X86/abi-isel.ll | 20 +++---- llvm/test/CodeGen/X86/add.ll | 12 ++--- .../test/CodeGen/X86/alloca-align-rounding.ll | 2 +- llvm/test/CodeGen/X86/avx-arith.ll | 9 ++-- llvm/test/CodeGen/X86/avx-intel-ocl.ll | 13 +++-- llvm/test/CodeGen/X86/avx-shuffle.ll | 4 +- llvm/test/CodeGen/X86/avx512-cvt.ll | 2 +- llvm/test/CodeGen/X86/avx512-mask-op.ll | 3 +- .../CodeGen/X86/break-anti-dependencies.ll | 2 +- llvm/test/CodeGen/X86/bt.ll | 54 +++++++++---------- llvm/test/CodeGen/X86/byval7.ll | 4 +- llvm/test/CodeGen/X86/chain_order.ll | 3 +- llvm/test/CodeGen/X86/cmov.ll | 10 ++-- llvm/test/CodeGen/X86/commute-two-addr.ll | 4 +- llvm/test/CodeGen/X86/fast-isel-mem.ll | 4 +- llvm/test/CodeGen/X86/fastcc.ll | 4 +- llvm/test/CodeGen/X86/fold-load.ll | 4 +- llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll | 15 ++++-- llvm/test/CodeGen/X86/full-lsr.ll | 2 +- llvm/test/CodeGen/X86/gather-addresses.ll | 38 ++++++++----- llvm/test/CodeGen/X86/ghc-cc.ll | 7 ++- llvm/test/CodeGen/X86/ghc-cc64.ll | 31 ++++++----- llvm/test/CodeGen/X86/hipe-cc.ll | 6 +-- llvm/test/CodeGen/X86/hipe-cc64.ll | 12 ++--- llvm/test/CodeGen/X86/lea-recursion.ll | 3 +- llvm/test/CodeGen/X86/lea.ll | 3 +- llvm/test/CodeGen/X86/load-slice.ll | 21 ++++---- llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll | 6 +-- llvm/test/CodeGen/X86/masked-iv-safe.ll | 49 ++++++++++++++--- llvm/test/CodeGen/X86/memcpy-2.ll | 14 ++--- llvm/test/CodeGen/X86/pmul.ll | 2 +- llvm/test/CodeGen/X86/pr14088.ll | 15 ++++-- llvm/test/CodeGen/X86/pr1505b.ll | 3 +- llvm/test/CodeGen/X86/pr16031.ll | 2 +- llvm/test/CodeGen/X86/pre-ra-sched.ll | 5 +- llvm/test/CodeGen/X86/rdrand.ll | 6 +-- llvm/test/CodeGen/X86/rdseed.ll | 6 +-- .../CodeGen/X86/segmented-stacks-dynamic.ll | 4 +- llvm/test/CodeGen/X86/select.ll | 10 ++-- llvm/test/CodeGen/X86/shift-bmi2.ll | 21 ++++---- llvm/test/CodeGen/X86/sink-hoist.ll | 9 ++-- llvm/test/CodeGen/X86/sse2.ll | 25 +++++---- llvm/test/CodeGen/X86/store-narrow.ll | 22 ++++---- llvm/test/CodeGen/X86/tailcall-largecode.ll | 2 +- llvm/test/CodeGen/X86/test-nofold.ll | 9 ++-- llvm/test/CodeGen/X86/trunc-to-bool.ll | 2 +- llvm/test/CodeGen/X86/v-binop-widen.ll | 5 +- llvm/test/CodeGen/X86/v-binop-widen2.ll | 4 +- llvm/test/CodeGen/X86/vec_shuffle-27.ll | 4 +- llvm/test/CodeGen/X86/vec_shuffle-39.ll | 6 +-- llvm/test/CodeGen/X86/widen_cast-1.ll | 2 +- .../CodeGen/X86/win64_alloca_dynalloca.ll | 12 ++--- llvm/test/CodeGen/X86/x86-64-psub.ll | 25 +++++---- llvm/test/CodeGen/X86/x86-shifts.ll | 4 +- llvm/test/CodeGen/X86/zext-fold.ll | 6 +-- llvm/test/CodeGen/X86/zext-sext.ll | 6 +-- .../DebugInfo/X86/dbg-value-dag-combine.ll | 2 +- 67 files changed, 346 insertions(+), 285 deletions(-) diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 21c2d573dc7f..fb357c467565 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -383,11 +383,14 @@ public: /// memset with zero passed as the second argument. Otherwise it /// returns null. const char *getBZeroEntry() const; - + /// This function returns true if the target has sincos() routine in its /// compiler runtime or math libraries. bool hasSinCos() const; + /// Enable the MachineScheduler pass for all X86 subtargets. + bool enableMachineScheduler() const LLVM_OVERRIDE { return true; } + /// enablePostRAScheduler - run for Atom optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, diff --git a/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll b/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll index 0afddd8f876f..69266dc4e44b 100644 --- a/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll +++ b/llvm/test/CodeGen/X86/2006-05-02-InstrSched1.ll @@ -1,7 +1,10 @@ ; REQUIRES: asserts ; RUN: llc < %s -march=x86 -relocation-model=static -stats 2>&1 | \ -; RUN: grep asm-printer | grep 14 +; RUN: grep asm-printer | grep 16 ; +; It's possible to schedule this in 14 instructions by avoiding +; callee-save registers, but the scheduler isn't currently that +; conervative with registers. @size20 = external global i32 ; [#uses=1] @in5 = external global i8* ; [#uses=1] @@ -21,4 +24,3 @@ define i32 @compare(i8* %a, i8* %b) nounwind { } declare i32 @memcmp(i8*, i8*, i32) - diff --git a/llvm/test/CodeGen/X86/2007-01-08-InstrSched.ll b/llvm/test/CodeGen/X86/2007-01-08-InstrSched.ll index 24aa5b98d0bb..4ec703921e29 100644 --- a/llvm/test/CodeGen/X86/2007-01-08-InstrSched.ll +++ b/llvm/test/CodeGen/X86/2007-01-08-InstrSched.ll @@ -13,10 +13,10 @@ define float @foo(float %x) nounwind { ; CHECK: mulss ; CHECK: mulss -; CHECK: addss +; CHECK: mulss ; CHECK: mulss ; CHECK: addss -; CHECK: mulss +; CHECK: addss ; CHECK: addss ; CHECK: ret } diff --git a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll index 43c239725058..764c2cdd6d99 100644 --- a/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll +++ b/llvm/test/CodeGen/X86/2009-02-26-MachineLICMBug.ll @@ -17,9 +17,9 @@ bb4: ; preds = %bb.i, %bb26, %bb4, %entry ; CHECK: %bb4 ; CHECK: xorl ; CHECK: callq +; CHECK: xorl +; CHECK: xorl ; CHECK: movq -; CHECK: xorl -; CHECK: xorl %0 = call i32 (...)* @xxGetOffsetForCode(i32 undef) nounwind ; [#uses=0] %ins = or i64 %p, 2097152 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll b/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll index d4a74c9e7e7a..060c535dd778 100644 --- a/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll +++ b/llvm/test/CodeGen/X86/2010-02-19-TailCallRetAddrBug.ll @@ -1,9 +1,9 @@ -; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt < %s | FileCheck %s +; RUN: llc -mcpu=generic -mtriple=i386-apple-darwin -tailcallopt -enable-misched=false < %s | FileCheck %s ; Check that lowered argumens do not overwrite the return address before it is moved. ; Bug 6225 ; ; If a call is a fastcc tail call and tail call optimization is enabled, the -; caller frame is replaced by the callee frame. This can require that arguments are +; caller frame is replaced by the callee frame. This can require that arguments are ; placed on the former return address stack slot. Special care needs to be taken ; taken that the return address is moved / or stored in a register before ; lowering of arguments potentially overwrites the value. @@ -51,5 +51,3 @@ false: tail call fastcc void @l298(i32 %r10, i32 %r9, i32 %r4) noreturn nounwind ret void } - - diff --git a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll index 1b339777f571..39d89e3d8276 100644 --- a/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll +++ b/llvm/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll @@ -19,8 +19,8 @@ entry: } ; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip) -; CHECK: movb 38(%rsp), [[R0:%.+]] -; CHECK: movb 8(%rsp), [[R1:%.+]] -; CHECK: movb [[R1]], 8(%rsp) -; CHECK: movb [[R0]], 38(%rsp) +; CHECK: movb (%rsp), [[R1:%.+]] +; CHECK: movb 30(%rsp), [[R0:%.+]] +; CHECK: movb [[R1]], (%rsp) +; CHECK: movb [[R0]], 30(%rsp) ; CHECK: callq ___stack_chk_fail diff --git a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll index da734d4b6454..07a6910c65e0 100644 --- a/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll +++ b/llvm/test/CodeGen/X86/2011-10-19-LegelizeLoad.ll @@ -16,8 +16,8 @@ target triple = "x86_64-unknown-linux-gnu" ; CHECK: main define i32 @main() nounwind uwtable { entry: -; CHECK: pmovsxbq j(%rip), % ; CHECK: pmovsxbq i(%rip), % +; CHECK: pmovsxbq j(%rip), % %0 = load <2 x i8>* @i, align 8 %1 = load <2 x i8>* @j, align 8 %div = sdiv <2 x i8> %1, %0 @@ -25,4 +25,3 @@ entry: ret i32 0 ; CHECK: ret } - diff --git a/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll b/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll index 186fafb9edb2..16706ae957f2 100644 --- a/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll +++ b/llvm/test/CodeGen/X86/2012-04-26-sdglue.ll @@ -5,8 +5,8 @@ ; It's hard to test for the ISEL condition because CodeGen optimizes ; away the bugpointed code. Just ensure the basics are still there. ;CHECK-LABEL: func: -;CHECK: vxorps -;CHECK: vinsertf128 +;CHECK: vpxor +;CHECK: vinserti128 ;CHECK: vpshufd ;CHECK: vpshufd ;CHECK: vmulps diff --git a/llvm/test/CodeGen/X86/3addr-16bit.ll b/llvm/test/CodeGen/X86/3addr-16bit.ll index 77c3c1616a40..fafdfdb74811 100644 --- a/llvm/test/CodeGen/X86/3addr-16bit.ll +++ b/llvm/test/CodeGen/X86/3addr-16bit.ll @@ -34,7 +34,8 @@ entry: ; 64BIT-LABEL: t2: ; 64BIT-NOT: movw %si, %ax -; 64BIT: leal -1(%rsi), %eax +; 64BIT: decl %eax +; 64BIT: movzwl %ax %0 = icmp eq i16 %k, %c ; [#uses=1] %1 = add i16 %k, -1 ; [#uses=3] br i1 %0, label %bb, label %bb1 @@ -58,7 +59,7 @@ entry: ; 64BIT-LABEL: t3: ; 64BIT-NOT: movw %si, %ax -; 64BIT: leal 2(%rsi), %eax +; 64BIT: addl $2, %eax %0 = add i16 %k, 2 ; [#uses=3] %1 = icmp eq i16 %k, %c ; [#uses=1] br i1 %1, label %bb, label %bb1 @@ -81,7 +82,7 @@ entry: ; 64BIT-LABEL: t4: ; 64BIT-NOT: movw %si, %ax -; 64BIT: leal (%rsi,%rdi), %eax +; 64BIT: addl %edi, %eax %0 = add i16 %k, %c ; [#uses=3] %1 = icmp eq i16 %k, %c ; [#uses=1] br i1 %1, label %bb, label %bb1 diff --git a/llvm/test/CodeGen/X86/StackColoring.ll b/llvm/test/CodeGen/X86/StackColoring.ll index f1d92965c496..a8e3537fabe3 100644 --- a/llvm/test/CodeGen/X86/StackColoring.ll +++ b/llvm/test/CodeGen/X86/StackColoring.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.8.0" -;YESCOLOR: subq $136, %rsp -;NOCOLOR: subq $264, %rsp +;YESCOLOR: subq $144, %rsp +;NOCOLOR: subq $272, %rsp define i32 @myCall_w2(i32 %in) { entry: diff --git a/llvm/test/CodeGen/X86/abi-isel.ll b/llvm/test/CodeGen/X86/abi-isel.ll index 3b84231ecd54..633e70f0285a 100644 --- a/llvm/test/CodeGen/X86/abi-isel.ll +++ b/llvm/test/CodeGen/X86/abi-isel.ll @@ -1,16 +1,16 @@ -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-unknown-linux-gnu -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-32-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=LINUX-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=LINUX-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-unknown-linux-gnu -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=LINUX-64-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-32-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-32-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=i686-apple-darwin -march=x86 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-32-PIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small | FileCheck %s -check-prefix=DARWIN-64-STATIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC -; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small | FileCheck %s -check-prefix=DARWIN-64-PIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=static -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-STATIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=dynamic-no-pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-DYNAMIC +; RUN: llc < %s -asm-verbose=0 -mcpu=generic -mtriple=x86_64-apple-darwin -march=x86-64 -relocation-model=pic -code-model=small -pre-RA-sched=list-ilp | FileCheck %s -check-prefix=DARWIN-64-PIC @src = external global [131072 x i32] @dst = external global [131072 x i32] diff --git a/llvm/test/CodeGen/X86/add.ll b/llvm/test/CodeGen/X86/add.ll index f36577b26a1a..62a62a460bd7 100644 --- a/llvm/test/CodeGen/X86/add.ll +++ b/llvm/test/CodeGen/X86/add.ll @@ -9,7 +9,7 @@ define i32 @test1(i32 inreg %a) nounwind { %b = add i32 %a, 128 ret i32 %b ; X32: subl $-128, %eax -; X64: subl $-128, +; X64: subl $-128, } define i64 @test2(i64 inreg %a) nounwind { %b = add i64 %a, 2147483648 @@ -20,7 +20,7 @@ define i64 @test2(i64 inreg %a) nounwind { define i64 @test3(i64 inreg %a) nounwind { %b = add i64 %a, 128 ret i64 %b - + ; X32: addl $128, %eax ; X64: subq $-128, } @@ -38,7 +38,7 @@ normal: overflow: ret i1 false - + ; X32-LABEL: test4: ; X32: addl ; X32-NEXT: jo @@ -82,11 +82,11 @@ define i64 @test6(i64 %A, i32 %B) nounwind { ret i64 %tmp5 ; X32-LABEL: test6: -; X32: movl 12(%esp), %edx +; X32: movl 4(%esp), %eax +; X32-NEXT: movl 12(%esp), %edx ; X32-NEXT: addl 8(%esp), %edx -; X32-NEXT: movl 4(%esp), %eax ; X32-NEXT: ret - + ; X64-LABEL: test6: ; X64: shlq $32, %r[[A1]] ; X64: leaq (%r[[A1]],%r[[A0]]), %rax diff --git a/llvm/test/CodeGen/X86/alloca-align-rounding.ll b/llvm/test/CodeGen/X86/alloca-align-rounding.ll index 3d76fb0aa25b..74b9470db752 100644 --- a/llvm/test/CodeGen/X86/alloca-align-rounding.ll +++ b/llvm/test/CodeGen/X86/alloca-align-rounding.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s +; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux -enable-misched=false | FileCheck %s declare void @bar(<2 x i64>* %n) diff --git a/llvm/test/CodeGen/X86/avx-arith.ll b/llvm/test/CodeGen/X86/avx-arith.ll index 4aa337033df6..a9da1ec067ca 100644 --- a/llvm/test/CodeGen/X86/avx-arith.ll +++ b/llvm/test/CodeGen/X86/avx-arith.ll @@ -240,14 +240,14 @@ define <16 x i16> @vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone { ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpaddq %xmm -; CHECK-NEXT: vpmuludq %xmm -; CHECK-NEXT: vpsrlq $32, %xmm -; CHECK-NEXT: vpmuludq %xmm -; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm ; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpaddq %xmm +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsrlq $32, %xmm +; CHECK-NEXT: vpmuludq %xmm +; CHECK-NEXT: vpsllq $32, %xmm ; CHECK-NEXT: vpaddq %xmm ; CHECK-NEXT: vpsrlq $32, %xmm ; CHECK-NEXT: vpmuludq %xmm @@ -269,4 +269,3 @@ define <4 x float> @int_sqrt_ss() { %x2 = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %x1) nounwind ret <4 x float> %x2 } - diff --git a/llvm/test/CodeGen/X86/avx-intel-ocl.ll b/llvm/test/CodeGen/X86/avx-intel-ocl.ll index 055072098a25..7337815a39ac 100644 --- a/llvm/test/CodeGen/X86/avx-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx-intel-ocl.ll @@ -32,7 +32,7 @@ declare i32 @func_int(i32, i32) define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { %y = alloca <16 x float>, align 16 %x = fadd <16 x float> %a, %b - %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) %2 = load <16 x float>* %y, align 16 %3 = fadd <16 x float> %2, %1 ret <16 x float> %3 @@ -43,21 +43,21 @@ define <16 x float> @testf16_inp(<16 x float> %a, <16 x float> %b) nounwind { ; preserved ymm6-ymm15 ; WIN64: testf16_regs ; WIN64: call -; WIN64: vaddps {{%ymm[6-7]}}, %ymm0, %ymm0 -; WIN64: vaddps {{%ymm[6-7]}}, %ymm1, %ymm1 +; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} +; WIN64: vaddps {{%ymm[6-7]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; WIN64: ret ; preserved ymm8-ymm15 ; X64: testf16_regs ; X64: call -; X64: vaddps {{%ymm[8-9]}}, %ymm0, %ymm0 -; X64: vaddps {{%ymm[8-9]}}, %ymm1, %ymm1 +; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} +; X64: vaddps {{%ymm[8-9]}}, {{%ymm[0-1]}}, {{%ymm[0-1]}} ; X64: ret define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { %y = alloca <16 x float>, align 16 %x = fadd <16 x float> %a, %b - %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) + %1 = call intel_ocl_bicc <16 x float> @func_float16_ptr(<16 x float> %x, <16 x float>* %y) %2 = load <16 x float>* %y, align 16 %3 = fadd <16 x float> %1, %b %4 = fadd <16 x float> %2, %3 @@ -166,4 +166,3 @@ entry: %8 = shufflevector <8 x float> %3, <8 x float> %7, <8 x i32> ret <8 x float> %8 } - diff --git a/llvm/test/CodeGen/X86/avx-shuffle.ll b/llvm/test/CodeGen/X86/avx-shuffle.ll index b0e64d102751..0956361c7e30 100644 --- a/llvm/test/CodeGen/X86/avx-shuffle.ll +++ b/llvm/test/CodeGen/X86/avx-shuffle.ll @@ -81,7 +81,7 @@ entry: define i32 @test9(<4 x i32> %a) nounwind { ; CHECK: test9 ; CHECK: vpextrd - %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> + %b = shufflevector <4 x i32> %a, <4 x i32> undef, <8 x i32> %r = extractelement <8 x i32> %b, i32 2 ; CHECK: ret ret i32 %r @@ -251,6 +251,8 @@ define <8 x float> @test19(<8 x float> %A, <8 x float>%B) nounwind { ; CHECK: swap8doubles ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} ; CHECK: vmovups {{[0-9]*}}(%rdi), %xmm{{[0-9]+}} +; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}} +; CHECK: vinsertf128 $1, {{[0-9]*}}(%rdi), %ymm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps {{[0-9]*}}(%rsi), %ymm{{[0-9]+}} ; CHECK: vmovaps %xmm{{[0-9]+}}, {{[0-9]*}}(%rdi) diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index bbad5076407a..ed68ff7bcbdb 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -167,8 +167,8 @@ define i32 @float_to_int(float %x) { } ; CHECK-LABEL: uitof64 -; CHECK: vextracti64x4 ; CHECK: vcvtudq2pd +; CHECK: vextracti64x4 ; CHECK: vcvtudq2pd ; CHECK: ret define <16 x double> @uitof64(<16 x i32> %a) nounwind { diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index eec8873b2a44..ef5cb56d7284 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -27,8 +27,8 @@ define i16 @mand16(i16 %x, i16 %y) { %md = xor <16 x i1> %ma, %mb %me = or <16 x i1> %mc, %md %ret = bitcast <16 x i1> %me to i16 -; CHECK: kxorw ; CHECK: kandw +; CHECK: kxorw ; CHECK: korw ret i16 %ret } @@ -55,4 +55,3 @@ define i8 @shuf_test1(i16 %v) nounwind { %mask1 = bitcast <8 x i1> %mask to i8 ret i8 %mask1 } - diff --git a/llvm/test/CodeGen/X86/break-anti-dependencies.ll b/llvm/test/CodeGen/X86/break-anti-dependencies.ll index c94261467c9d..614d0adc7271 100644 --- a/llvm/test/CodeGen/X86/break-anti-dependencies.ll +++ b/llvm/test/CodeGen/X86/break-anti-dependencies.ll @@ -1,7 +1,7 @@ ; Without list-burr scheduling we may not see the difference in codegen here. ; Use a subtarget that has post-RA scheduling enabled because the anti-dependency ; breaker requires liveness information to be kept. -; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t +; RUN: llc < %s -march=x86-64 -mcpu=atom -enable-misched=false -post-RA-scheduler -pre-RA-sched=list-burr -break-anti-dependencies=none > %t ; RUN: grep "%xmm0" %t | count 14 ; RUN: not grep "%xmm1" %t ; RUN: llc < %s -march=x86-64 -mcpu=atom -post-RA-scheduler -break-anti-dependencies=critical > %t diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll index e28923bb21d2..f12a3543b072 100644 --- a/llvm/test/CodeGen/X86/bt.ll +++ b/llvm/test/CodeGen/X86/bt.ll @@ -38,7 +38,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: test2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -56,7 +56,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atest2(i32 %x, i32 %n) nounwind { entry: ; CHECK: atest2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -74,7 +74,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atest2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: atest2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 %tmp4 = icmp eq i32 %tmp3, 0 ; [#uses=1] @@ -91,7 +91,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test3(i32 %x, i32 %n) nounwind { entry: ; CHECK: test3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -109,7 +109,7 @@ UnifiedReturnBlock: ; preds = %entry define void @test3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: test3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -127,7 +127,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -145,7 +145,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -163,7 +163,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atestne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: atestne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -181,7 +181,7 @@ UnifiedReturnBlock: ; preds = %entry define void @atestne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: atestne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -199,7 +199,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne3(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -217,7 +217,7 @@ UnifiedReturnBlock: ; preds = %entry define void @testne3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: testne3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -235,7 +235,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query2(i32 %x, i32 %n) nounwind { entry: ; CHECK: query2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -253,7 +253,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: query2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -271,7 +271,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aquery2(i32 %x, i32 %n) nounwind { entry: ; CHECK: aquery2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -289,7 +289,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aquery2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: aquery2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -307,7 +307,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -325,7 +325,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -343,7 +343,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3x(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3x -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -361,7 +361,7 @@ UnifiedReturnBlock: ; preds = %entry define void @query3bx(i32 %x, i32 %n) nounwind { entry: ; CHECK: query3bx -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jae %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -379,7 +379,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -397,7 +397,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = lshr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -415,7 +415,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aqueryne2(i32 %x, i32 %n) nounwind { entry: ; CHECK: aqueryne2 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 %tmp29, 1 ; [#uses=1] @@ -433,7 +433,7 @@ UnifiedReturnBlock: ; preds = %entry define void @aqueryne2b(i32 %x, i32 %n) nounwind { entry: ; CHECK: aqueryne2b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = ashr i32 %x, %n ; [#uses=1] %tmp3 = and i32 1, %tmp29 @@ -451,7 +451,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3 -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -469,7 +469,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3b(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3b -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 @@ -487,7 +487,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3x(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3x -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %tmp29, %x ; [#uses=1] @@ -505,7 +505,7 @@ UnifiedReturnBlock: ; preds = %entry define void @queryne3bx(i32 %x, i32 %n) nounwind { entry: ; CHECK: queryne3bx -; CHECK: btl %eax, %ecx +; CHECK: btl %e{{..}}, %e{{..}} ; CHECK: jb %tmp29 = shl i32 1, %n ; [#uses=1] %tmp3 = and i32 %x, %tmp29 diff --git a/llvm/test/CodeGen/X86/byval7.ll b/llvm/test/CodeGen/X86/byval7.ll index 8a96e41c9cdb..42751d7dbc93 100644 --- a/llvm/test/CodeGen/X86/byval7.ll +++ b/llvm/test/CodeGen/X86/byval7.ll @@ -7,14 +7,14 @@ define i32 @main() nounwind { entry: ; CHECK-LABEL: main: -; CHECK: movl $1, (%esp) ; CHECK: leal 16(%esp), %edi ; CHECK: leal 160(%esp), %esi ; CHECK: rep;movsl +; CHECK: movl $1, (%esp) %s = alloca %struct.S ; <%struct.S*> [#uses=2] %tmp15 = getelementptr %struct.S* %s, i32 0, i32 0 ; <<2 x i64>*> [#uses=1] store <2 x i64> < i64 8589934595, i64 1 >, <2 x i64>* %tmp15, align 16 - call void @t( i32 1, %struct.S* byval %s ) nounwind + call void @t( i32 1, %struct.S* byval %s ) nounwind ret i32 0 } diff --git a/llvm/test/CodeGen/X86/chain_order.ll b/llvm/test/CodeGen/X86/chain_order.ll index 8c1c86420f6b..c88726e75a81 100644 --- a/llvm/test/CodeGen/X86/chain_order.ll +++ b/llvm/test/CodeGen/X86/chain_order.ll @@ -3,8 +3,8 @@ ;CHECK-LABEL: cftx020: ;CHECK: vmovsd (%rdi), %xmm{{.*}} ;CHECK: vmovsd 16(%rdi), %xmm{{.*}} -;CHECK: vmovhpd 8(%rdi), %xmm{{.*}} ;CHECK: vmovsd 24(%rdi), %xmm{{.*}} +;CHECK: vmovhpd 8(%rdi), %xmm{{.*}} ;CHECK: vmovupd %xmm{{.*}}, (%rdi) ;CHECK: vmovupd %xmm{{.*}}, 16(%rdi) ;CHECK: ret @@ -35,4 +35,3 @@ entry: store <2 x double> %14, <2 x double>* %15, align 8 ret void } - diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 92c0445d1723..215b86267a47 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -4,8 +4,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 define i32 @test1(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { entry: ; CHECK-LABEL: test1: -; CHECK: movl $12, %eax -; CHECK-NEXT: btl +; CHECK: btl +; CHECK-NEXT: movl $12, %eax ; CHECK-NEXT: cmovael (%rcx), %eax ; CHECK-NEXT: ret @@ -19,8 +19,8 @@ entry: define i32 @test2(i32 %x, i32 %n, i32 %w, i32* %vp) nounwind readnone { entry: ; CHECK-LABEL: test2: -; CHECK: movl $12, %eax -; CHECK-NEXT: btl +; CHECK: btl +; CHECK-NEXT: movl $12, %eax ; CHECK-NEXT: cmovbl (%rcx), %eax ; CHECK-NEXT: ret @@ -92,7 +92,7 @@ bb.i.i.i: ; preds = %entry ; CHECK: testb ; CHECK-NOT: xor ; CHECK: setne -; CHECK-NEXT: testb +; CHECK: testb func_4.exit.i: ; preds = %bb.i.i.i, %entry %.not.i = xor i1 %2, true ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/commute-two-addr.ll b/llvm/test/CodeGen/X86/commute-two-addr.ll index eb44e0883489..656c385e2bc7 100644 --- a/llvm/test/CodeGen/X86/commute-two-addr.ll +++ b/llvm/test/CodeGen/X86/commute-two-addr.ll @@ -38,10 +38,10 @@ define i32 @t2(i32 %X, i32 %Y) nounwind { define %0 @t3(i32 %lb, i8 zeroext %has_lb, i8 zeroext %lb_inclusive, i32 %ub, i8 zeroext %has_ub, i8 zeroext %ub_inclusive) nounwind { entry: ; DARWIN-LABEL: t3: -; DARWIN: shll $16 ; DARWIN: shlq $32, %rcx +; DARWIN-NEXT: orq %rcx, %rax +; DARWIN-NEXT: shll $8 ; DARWIN-NOT: leaq -; DARWIN: orq %rcx, %rax %tmp21 = zext i32 %lb to i64 %tmp23 = zext i32 %ub to i64 %tmp24 = shl i64 %tmp23, 32 diff --git a/llvm/test/CodeGen/X86/fast-isel-mem.ll b/llvm/test/CodeGen/X86/fast-isel-mem.ll index 7fcef0322cb9..cd2dc1d02c8a 100644 --- a/llvm/test/CodeGen/X86/fast-isel-mem.ll +++ b/llvm/test/CodeGen/X86/fast-isel-mem.ll @@ -40,7 +40,7 @@ entry: ; CHECK: movl L_LotsStuff$non_lazy_ptr, %ecx ; ATOM: _t: -; ATOM: movl L_LotsStuff$non_lazy_ptr, %ecx -; ATOM: movl $0, %eax +; ATOM: movl L_LotsStuff$non_lazy_ptr, %e{{..}} +; ATOM: movl $0, %e{{..}} } diff --git a/llvm/test/CodeGen/X86/fastcc.ll b/llvm/test/CodeGen/X86/fastcc.ll index 705ab7bada7c..a362f8d1ca7e 100644 --- a/llvm/test/CodeGen/X86/fastcc.ll +++ b/llvm/test/CodeGen/X86/fastcc.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+sse2 -post-RA-scheduler=false | FileCheck %s -; CHECK: movsd %xmm0, 8(%esp) -; CHECK: xorl %ecx, %ecx +; CHECK: movsd %xmm{{[0-9]}}, 8(%esp) +; CHECK: xorl %eax, %eax @d = external global double ; [#uses=1] @c = external global double ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/fold-load.ll b/llvm/test/CodeGen/X86/fold-load.ll index a1fc7dbd7b19..dde0a2d1c5d3 100644 --- a/llvm/test/CodeGen/X86/fold-load.ll +++ b/llvm/test/CodeGen/X86/fold-load.ll @@ -38,10 +38,10 @@ L: store i16 %A, i16* %Q ret i32 %D - + ; CHECK-LABEL: test2: ; CHECK: movl 4(%esp), %eax -; CHECK-NEXT: movzwl (%eax), %ecx +; CHECK-NEXT: movzwl (%eax), %e{{..}} } diff --git a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll index 0a3afb7b025b..60a6844b39b2 100644 --- a/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll +++ b/llvm/test/CodeGen/X86/fold-pcmpeqd-2.ll @@ -54,22 +54,27 @@ forbody: ; preds = %forcond %mul310 = fmul <4 x float> %bitcast204.i104, zeroinitializer ; <<4 x float>> [#uses=2] %mul313 = fmul <4 x float> %bitcast204.i, zeroinitializer ; <<4 x float>> [#uses=1] %cmpunord.i11 = call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> zeroinitializer, <4 x float> zeroinitializer, i8 3) nounwind ; <<4 x float>> [#uses=1] + %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1] + %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1] + + call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind + + %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %bitcast6.i13 = bitcast <4 x float> %cmpunord.i11 to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i14 = add <4 x i32> , %bitcast6.i13 ; <<4 x i32>> [#uses=1] %not.i16 = xor <4 x i32> %bitcast6.i13, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i17 = add <4 x i32> , %not.i16 ; <<4 x i32>> [#uses=1] %orps.i18 = or <4 x i32> %andnps.i17, %andps.i14 ; <<4 x i32>> [#uses=1] %bitcast17.i19 = bitcast <4 x i32> %orps.i18 to <4 x float> ; <<4 x float>> [#uses=1] - %tmp83 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul310, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] - %bitcast.i3 = bitcast <4 x float> %mul310 to <4 x i32> ; <<4 x i32>> [#uses=1] - %andps.i5 = and <4 x i32> %bitcast.i3, zeroinitializer ; <<4 x i32>> [#uses=1] + %bitcast11.i6 = bitcast <4 x float> %tmp83 to <4 x i32> ; <<4 x i32>> [#uses=1] %not.i7 = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1] %andnps.i8 = and <4 x i32> %bitcast11.i6, %not.i7 ; <<4 x i32>> [#uses=1] - call void null(<4 x float> %mul313, <4 x float> %cmpunord.i11, <4 x float> %tmp83, <4 x float> zeroinitializer, %struct.__ImageExecInfo* null, <4 x i32> zeroinitializer) nounwind %orps.i9 = or <4 x i32> %andnps.i8, %andps.i5 ; <<4 x i32>> [#uses=1] %bitcast17.i10 = bitcast <4 x i32> %orps.i9 to <4 x float> ; <<4 x float>> [#uses=1] - %tmp84 = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %mul313, <4 x float> zeroinitializer) nounwind ; <<4 x float>> [#uses=1] + %bitcast6.i = bitcast <4 x float> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=2] %andps.i = and <4 x i32> zeroinitializer, %bitcast6.i ; <<4 x i32>> [#uses=1] %bitcast11.i = bitcast <4 x float> %tmp84 to <4 x i32> ; <<4 x i32>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/full-lsr.ll b/llvm/test/CodeGen/X86/full-lsr.ll index 0729dda4a12b..cbcc62a7011a 100644 --- a/llvm/test/CodeGen/X86/full-lsr.ll +++ b/llvm/test/CodeGen/X86/full-lsr.ll @@ -4,7 +4,7 @@ define void @foo(float* nocapture %A, float* nocapture %B, float* nocapture %C, i32 %N) nounwind { ; ATOM: foo ; ATOM: addl -; ATOM: leal +; ATOM: addl ; ATOM: leal ; CHECK: foo diff --git a/llvm/test/CodeGen/X86/gather-addresses.ll b/llvm/test/CodeGen/X86/gather-addresses.ll index 72a50961b2ff..5f48b1e32b16 100644 --- a/llvm/test/CodeGen/X86/gather-addresses.ll +++ b/llvm/test/CodeGen/X86/gather-addresses.ll @@ -1,21 +1,35 @@ -; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s -; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s +; RUN: llc -mtriple=x86_64-linux -mcpu=nehalem < %s | FileCheck %s --check-prefix=LIN +; RUN: llc -mtriple=x86_64-win32 -mcpu=nehalem < %s | FileCheck %s --check-prefix=WIN ; rdar://7398554 ; When doing vector gather-scatter index calculation with 32-bit indices, ; bounce the vector off of cache rather than shuffling each individual ; element out of the index vector. -; CHECK: andps ([[H:%rdx|%r8]]), %xmm0 -; CHECK: movaps %xmm0, {{(-24)?}}(%rsp) -; CHECK: movslq {{(-24)?}}(%rsp), %rax -; CHECK: movsd ([[P:%rdi|%rcx]],%rax,8), %xmm0 -; CHECK: movslq {{-20|4}}(%rsp), %rax -; CHECK: movhpd ([[P]],%rax,8), %xmm0 -; CHECK: movslq {{-16|8}}(%rsp), %rax -; CHECK: movsd ([[P]],%rax,8), %xmm1 -; CHECK: movslq {{-12|12}}(%rsp), %rax -; CHECK: movhpd ([[P]],%rax,8), %xmm1 +; CHECK: foo: +; LIN: movaps (%rsi), %xmm0 +; LIN: andps (%rdx), %xmm0 +; LIN: movaps %xmm0, -24(%rsp) +; LIN: movslq -24(%rsp), %[[REG1:r.+]] +; LIN: movslq -20(%rsp), %[[REG2:r.+]] +; LIN: movslq -16(%rsp), %[[REG3:r.+]] +; LIN: movslq -12(%rsp), %[[REG4:r.+]] +; LIN: movsd (%rdi,%[[REG1]],8), %xmm0 +; LIN: movhpd (%rdi,%[[REG2]],8), %xmm0 +; LIN: movsd (%rdi,%[[REG3]],8), %xmm1 +; LIN: movhpd (%rdi,%[[REG4]],8), %xmm1 + +; WIN: movaps (%rdx), %xmm0 +; WIN: andps (%r8), %xmm0 +; WIN: movaps %xmm0, (%rsp) +; WIN: movslq (%rsp), %[[REG1:r.+]] +; WIN: movslq 4(%rsp), %[[REG2:r.+]] +; WIN: movslq 8(%rsp), %[[REG3:r.+]] +; WIN: movslq 12(%rsp), %[[REG4:r.+]] +; WIN: movsd (%rcx,%[[REG1]],8), %xmm0 +; WIN: movhpd (%rcx,%[[REG2]],8), %xmm0 +; WIN: movsd (%rcx,%[[REG3]],8), %xmm1 +; WIN: movhpd (%rcx,%[[REG4]],8), %xmm1 define <4 x double> @foo(double* %p, <4 x i32>* %i, <4 x i32>* %h) nounwind { %a = load <4 x i32>* %i diff --git a/llvm/test/CodeGen/X86/ghc-cc.ll b/llvm/test/CodeGen/X86/ghc-cc.ll index 0e65cfdbae30..4dba2c086329 100644 --- a/llvm/test/CodeGen/X86/ghc-cc.ll +++ b/llvm/test/CodeGen/X86/ghc-cc.ll @@ -28,10 +28,10 @@ entry: define cc 10 void @foo() nounwind { entry: - ; CHECK: movl base, %ebx - ; CHECK-NEXT: movl sp, %ebp + ; CHECK: movl r1, %esi ; CHECK-NEXT: movl hp, %edi - ; CHECK-NEXT: movl r1, %esi + ; CHECK-NEXT: movl sp, %ebp + ; CHECK-NEXT: movl base, %ebx %0 = load i32* @r1 %1 = load i32* @hp %2 = load i32* @sp @@ -42,4 +42,3 @@ entry: } declare cc 10 void @bar(i32, i32, i32, i32) - diff --git a/llvm/test/CodeGen/X86/ghc-cc64.ll b/llvm/test/CodeGen/X86/ghc-cc64.ll index fcf7e1797ad8..403391e81658 100644 --- a/llvm/test/CodeGen/X86/ghc-cc64.ll +++ b/llvm/test/CodeGen/X86/ghc-cc64.ll @@ -41,22 +41,22 @@ entry: define cc 10 void @foo() nounwind { entry: - ; CHECK: movq base(%rip), %r13 - ; CHECK-NEXT: movq sp(%rip), %rbp - ; CHECK-NEXT: movq hp(%rip), %r12 - ; CHECK-NEXT: movq r1(%rip), %rbx - ; CHECK-NEXT: movq r2(%rip), %r14 - ; CHECK-NEXT: movq r3(%rip), %rsi - ; CHECK-NEXT: movq r4(%rip), %rdi - ; CHECK-NEXT: movq r5(%rip), %r8 - ; CHECK-NEXT: movq r6(%rip), %r9 - ; CHECK-NEXT: movq splim(%rip), %r15 - ; CHECK-NEXT: movss f1(%rip), %xmm1 - ; CHECK-NEXT: movss f2(%rip), %xmm2 - ; CHECK-NEXT: movss f3(%rip), %xmm3 - ; CHECK-NEXT: movss f4(%rip), %xmm4 + ; CHECK: movsd d2(%rip), %xmm6 ; CHECK-NEXT: movsd d1(%rip), %xmm5 - ; CHECK-NEXT: movsd d2(%rip), %xmm6 + ; CHECK-NEXT: movss f4(%rip), %xmm4 + ; CHECK-NEXT: movss f3(%rip), %xmm3 + ; CHECK-NEXT: movss f2(%rip), %xmm2 + ; CHECK-NEXT: movss f1(%rip), %xmm1 + ; CHECK-NEXT: movq splim(%rip), %r15 + ; CHECK-NEXT: movq r6(%rip), %r9 + ; CHECK-NEXT: movq r5(%rip), %r8 + ; CHECK-NEXT: movq r4(%rip), %rdi + ; CHECK-NEXT: movq r3(%rip), %rsi + ; CHECK-NEXT: movq r2(%rip), %r14 + ; CHECK-NEXT: movq r1(%rip), %rbx + ; CHECK-NEXT: movq hp(%rip), %r12 + ; CHECK-NEXT: movq sp(%rip), %rbp + ; CHECK-NEXT: movq base(%rip), %r13 %0 = load double* @d2 %1 = load double* @d1 %2 = load float* @f4 @@ -83,4 +83,3 @@ entry: declare cc 10 void @bar(i64, i64, i64, i64, i64, i64, i64, i64, i64, i64, float, float, float, float, double, double) - diff --git a/llvm/test/CodeGen/X86/hipe-cc.ll b/llvm/test/CodeGen/X86/hipe-cc.ll index 76d17a09d54e..b34417ebf69b 100644 --- a/llvm/test/CodeGen/X86/hipe-cc.ll +++ b/llvm/test/CodeGen/X86/hipe-cc.ll @@ -49,10 +49,10 @@ entry: store i32 %arg1, i32* %arg1_var store i32 %arg2, i32* %arg2_var - ; CHECK: movl 4(%esp), %edx - ; CHECK-NEXT: movl 8(%esp), %eax + ; CHECK: movl 16(%esp), %esi ; CHECK-NEXT: movl 12(%esp), %ebp - ; CHECK-NEXT: movl 16(%esp), %esi + ; CHECK-NEXT: movl 8(%esp), %eax + ; CHECK-NEXT: movl 4(%esp), %edx %0 = load i32* %hp_var %1 = load i32* %p_var %2 = load i32* %arg0_var diff --git a/llvm/test/CodeGen/X86/hipe-cc64.ll b/llvm/test/CodeGen/X86/hipe-cc64.ll index 5dbb5a25cbeb..27e1c723a8f7 100644 --- a/llvm/test/CodeGen/X86/hipe-cc64.ll +++ b/llvm/test/CodeGen/X86/hipe-cc64.ll @@ -5,10 +5,10 @@ define void @zap(i64 %a, i64 %b) nounwind { entry: ; CHECK: movq %rsi, %rax - ; CHECK-NEXT: movq %rdi, %rsi - ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: movl $8, %ecx ; CHECK-NEXT: movl $9, %r8d + ; CHECK-NEXT: movq %rdi, %rsi + ; CHECK-NEXT: movq %rax, %rdx ; CHECK-NEXT: callq addfour %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) %res = extractvalue {i64, i64, i64} %0, 2 @@ -57,11 +57,11 @@ entry: store i64 %arg2, i64* %arg2_var store i64 %arg3, i64* %arg3_var - ; CHECK: movq 8(%rsp), %rcx - ; CHECK-NEXT: movq 16(%rsp), %rdx - ; CHECK-NEXT: movq 24(%rsp), %rsi + ; CHECK: movq 40(%rsp), %r15 ; CHECK-NEXT: movq 32(%rsp), %rbp - ; CHECK-NEXT: movq 40(%rsp), %r15 + ; CHECK-NEXT: movq 24(%rsp), %rsi + ; CHECK-NEXT: movq 16(%rsp), %rdx + ; CHECK-NEXT: movq 8(%rsp), %rcx %0 = load i64* %hp_var %1 = load i64* %p_var %2 = load i64* %arg0_var diff --git a/llvm/test/CodeGen/X86/lea-recursion.ll b/llvm/test/CodeGen/X86/lea-recursion.ll index 3f32fd27c5c1..9480600312ce 100644 --- a/llvm/test/CodeGen/X86/lea-recursion.ll +++ b/llvm/test/CodeGen/X86/lea-recursion.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=x86-64 | grep lea | count 12 +; RUN: llc < %s -march=x86-64 | grep lea | count 13 ; This testcase was written to demonstrate an instruction-selection problem, ; however it also happens to expose a limitation in the DAGCombiner's @@ -44,4 +44,3 @@ entry: store i32 %tmp10.6, i32* getelementptr ([1000 x i32]* @g0, i32 0, i32 7) ret void } - diff --git a/llvm/test/CodeGen/X86/lea.ll b/llvm/test/CodeGen/X86/lea.ll index affd6bf3bb89..93cfe4611b44 100644 --- a/llvm/test/CodeGen/X86/lea.ll +++ b/llvm/test/CodeGen/X86/lea.ll @@ -28,8 +28,7 @@ bb.nph: bb2: ret i32 %x_offs ; CHECK-LABEL: test2: -; CHECK: movl %e[[A0]], %eax -; CHECK: addl $-5, %eax +; CHECK: leal -5(%r[[A0:..]]), %eax ; CHECK: andl $-4, %eax ; CHECK: negl %eax ; CHECK: leal -4(%r[[A0]],%rax), %eax diff --git a/llvm/test/CodeGen/X86/load-slice.ll b/llvm/test/CodeGen/X86/load-slice.ll index b1f778c11721..85fd7f03ef62 100644 --- a/llvm/test/CodeGen/X86/load-slice.ll +++ b/llvm/test/CodeGen/X86/load-slice.ll @@ -17,14 +17,14 @@ ; High slice starts at 4 (base + 4-bytes) and is 4-bytes aligned. ; ; STRESS-LABEL: t1: -; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. -; STRESS: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]] -; Add high slice: out[out_start].imm, this is base + 4. -; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. -; STRESS-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]] +; STRESS: vmovss 64([[BASE:[^(]+]]), [[OUT_Real:%xmm[0-9]+]] ; Add low slice: out[out_start].real, this is base + 0. ; STRESS-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] +; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. +; STRESS-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]] +; Add high slice: out[out_start].imm, this is base + 4. +; STRESS-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Swap Imm and Real. ; STRESS-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]] ; Put the results back into out[out_start]. @@ -32,14 +32,14 @@ ; ; Same for REGULAR, we eliminate register bank copy with each slices. ; REGULAR-LABEL: t1: -; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. -; REGULAR: vmovss 68([[BASE:[^)]+]]), [[OUT_Imm:%xmm[0-9]+]] -; Add high slice: out[out_start].imm, this is base + 4. -; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Load out[out_start + 8].real, this is base + 8 * 8 + 0. -; REGULAR-NEXT: vmovss 64([[BASE]]), [[OUT_Real:%xmm[0-9]+]] +; REGULAR: vmovss 64([[BASE:[^)]+]]), [[OUT_Real:%xmm[0-9]+]] ; Add low slice: out[out_start].real, this is base + 0. ; REGULAR-NEXT: vaddss ([[BASE]]), [[OUT_Real]], [[RES_Real:%xmm[0-9]+]] +; Load out[out_start + 8].imm, this is base + 8 * 8 + 4. +; REGULAR-NEXT: vmovss 68([[BASE]]), [[OUT_Imm:%xmm[0-9]+]] +; Add high slice: out[out_start].imm, this is base + 4. +; REGULAR-NEXT: vaddss 4([[BASE]]), [[OUT_Imm]], [[RES_Imm:%xmm[0-9]+]] ; Swap Imm and Real. ; REGULAR-NEXT: vinsertps $16, [[RES_Imm]], [[RES_Real]], [[RES_Vec:%xmm[0-9]+]] ; Put the results back into out[out_start]. @@ -137,4 +137,3 @@ define i32 @t3(%class.Complex* nocapture %out, i64 %out_start) { %res = add i32 %slice32_lowhigh, %tmpres ret i32 %res } - diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll index c7a318680319..e7d74a924075 100644 --- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -2,12 +2,12 @@ ; RUN: llc -mtriple=x86_64-darwin -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s ; CHECK-LABEL: t: -; CHECK: decq -; CHECK-NEXT: movl (%r9,%rax,4), %eax +; CHECK: movl (%r9,%rax,4), %e{{..}} +; CHECK-NEXT: decq ; CHECK-NEXT: jne ; ATOM-LABEL: t: -; ATOM: movl (%r9,%r{{.+}},4), %eax +; ATOM: movl (%r9,%r{{.+}},4), %e{{..}} ; ATOM-NEXT: decq ; ATOM-NEXT: jne diff --git a/llvm/test/CodeGen/X86/masked-iv-safe.ll b/llvm/test/CodeGen/X86/masked-iv-safe.ll index c33cac2e05a2..4a4d178f6e41 100644 --- a/llvm/test/CodeGen/X86/masked-iv-safe.ll +++ b/llvm/test/CodeGen/X86/masked-iv-safe.ll @@ -1,15 +1,13 @@ -; RUN: llc < %s -mcpu=generic -march=x86-64 > %t -; RUN: not grep and %t -; RUN: not grep movz %t -; RUN: not grep sar %t -; RUN: not grep shl %t -; RUN: grep add %t | count 5 -; RUN: grep inc %t | count 2 -; RUN: grep lea %t | count 3 +; RUN: llc < %s -mcpu=generic -march=x86-64 | FileCheck %s ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. +; CHECK-LABEL: count_up +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: inc +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_up(double* %d, i64 %n) nounwind { entry: br label %loop @@ -38,6 +36,11 @@ return: ret void } +; CHECK-LABEL: count_down +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_down(double* %d, i64 %n) nounwind { entry: br label %loop @@ -66,6 +69,11 @@ return: ret void } +; CHECK-LABEL: count_up_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: inc +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_up_signed(double* %d, i64 %n) nounwind { entry: br label %loop @@ -96,6 +104,11 @@ return: ret void } +; CHECK-LABEL: count_down_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @count_down_signed(double* %d, i64 %n) nounwind { entry: br label %loop @@ -126,6 +139,11 @@ return: ret void } +; CHECK-LABEL: another_count_up +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_up(double* %d, i64 %n) nounwind { entry: br label %loop @@ -154,6 +172,11 @@ return: ret void } +; CHECK-LABEL: another_count_down +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: decq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_down(double* %d, i64 %n) nounwind { entry: br label %loop @@ -182,6 +205,11 @@ return: ret void } +; CHECK-LABEL: another_count_up_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: addq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_up_signed(double* %d, i64 %n) nounwind { entry: br label %loop @@ -212,6 +240,11 @@ return: ret void } +; CHECK-LABEL: another_count_down_signed +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: decq +; CHECK-NOT: {{and|movz|sar|shl}} +; CHECK: jne define void @another_count_down_signed(double* %d, i64 %n) nounwind { entry: br label %loop diff --git a/llvm/test/CodeGen/X86/memcpy-2.ll b/llvm/test/CodeGen/X86/memcpy-2.ll index c17cc7f7fdd2..6ae7807810e9 100644 --- a/llvm/test/CodeGen/X86/memcpy-2.ll +++ b/llvm/test/CodeGen/X86/memcpy-2.ll @@ -56,15 +56,15 @@ entry: define void @t2(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { entry: ; SSE2-Darwin-LABEL: t2: -; SSE2-Darwin: movaps (%eax), %xmm0 +; SSE2-Darwin: movaps (%ecx), %xmm0 ; SSE2-Darwin: movaps %xmm0, (%eax) ; SSE2-Mingw32-LABEL: t2: -; SSE2-Mingw32: movaps (%eax), %xmm0 +; SSE2-Mingw32: movaps (%ecx), %xmm0 ; SSE2-Mingw32: movaps %xmm0, (%eax) ; SSE1-LABEL: t2: -; SSE1: movaps (%eax), %xmm0 +; SSE1: movaps (%ecx), %xmm0 ; SSE1: movaps %xmm0, (%eax) ; NOSSE-LABEL: t2: @@ -91,14 +91,14 @@ entry: define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { entry: ; SSE2-Darwin-LABEL: t3: -; SSE2-Darwin: movsd (%eax), %xmm0 -; SSE2-Darwin: movsd 8(%eax), %xmm1 +; SSE2-Darwin: movsd (%ecx), %xmm0 +; SSE2-Darwin: movsd 8(%ecx), %xmm1 ; SSE2-Darwin: movsd %xmm1, 8(%eax) ; SSE2-Darwin: movsd %xmm0, (%eax) ; SSE2-Mingw32-LABEL: t3: -; SSE2-Mingw32: movsd (%eax), %xmm0 -; SSE2-Mingw32: movsd 8(%eax), %xmm1 +; SSE2-Mingw32: movsd (%ecx), %xmm0 +; SSE2-Mingw32: movsd 8(%ecx), %xmm1 ; SSE2-Mingw32: movsd %xmm1, 8(%eax) ; SSE2-Mingw32: movsd %xmm0, (%eax) diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index 169fa33d2293..7bf8a618fa77 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -1,6 +1,6 @@ ; RUN: llc < %s -march=x86 -mattr=sse4.1 -mcpu=nehalem -stack-alignment=16 > %t ; RUN: grep pmul %t | count 12 -; RUN: grep mov %t | count 11 +; RUN: grep mov %t | count 14 define <4 x i32> @a(<4 x i32> %i) nounwind { %A = mul <4 x i32> %i, < i32 117, i32 117, i32 117, i32 117 > diff --git a/llvm/test/CodeGen/X86/pr14088.ll b/llvm/test/CodeGen/X86/pr14088.ll index 505e3b5cf262..16f20d0500a1 100644 --- a/llvm/test/CodeGen/X86/pr14088.ll +++ b/llvm/test/CodeGen/X86/pr14088.ll @@ -19,7 +19,14 @@ return: ret i32 %retval.0 } -; We were miscompiling this and using %ax instead of %cx in the movw. -; CHECK: movswl %cx, %ecx -; CHECK: movw %cx, (%rsi) -; CHECK: movslq %ecx, %rcx +; We were miscompiling this and using %ax instead of %cx in the movw +; in the following sequence: +; movswl %cx, %ecx +; movw %cx, (%rsi) +; movslq %ecx, %rcx +; +; We can't produce the above sequence without special SD-level +; heuristics. Now we produce this: +; CHECK: movw %ax, (%rsi) +; CHECK: cwtl +; CHECK: cltq diff --git a/llvm/test/CodeGen/X86/pr1505b.ll b/llvm/test/CodeGen/X86/pr1505b.ll index 9b0ef83ab042..c348fec54674 100644 --- a/llvm/test/CodeGen/X86/pr1505b.ll +++ b/llvm/test/CodeGen/X86/pr1505b.ll @@ -57,11 +57,10 @@ entry: %tmp22 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream >"*> [#uses=1] %tmp30 = tail call %"struct.std::basic_ostream >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream >"* %tmp22 ) ; <%"struct.std::basic_ostream >"*> [#uses=0] ; reload: -; CHECK: fld -; CHECK: fstps ; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc %tmp34 = tail call %"struct.std::basic_ostream >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream >"*> [#uses=1] %tmp3940 = fpext float %tmp1314 to double ; [#uses=1] +; CHECK: fld ; CHECK: fstpl ; CHECK: ZNSolsEd %tmp42 = tail call %"struct.std::basic_ostream >"* @_ZNSolsEd( %"struct.std::basic_ostream >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream >"*> [#uses=1] diff --git a/llvm/test/CodeGen/X86/pr16031.ll b/llvm/test/CodeGen/X86/pr16031.ll index ab0b5efeb93c..ecf6218aeb38 100644 --- a/llvm/test/CodeGen/X86/pr16031.ll +++ b/llvm/test/CodeGen/X86/pr16031.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=corei7-avx -enable-misched=false | FileCheck %s ; CHECK-LABEL: main: ; CHECK: pushl %esi diff --git a/llvm/test/CodeGen/X86/pre-ra-sched.ll b/llvm/test/CodeGen/X86/pre-ra-sched.ll index b792ffa09fb9..70135d43f49b 100644 --- a/llvm/test/CodeGen/X86/pre-ra-sched.ll +++ b/llvm/test/CodeGen/X86/pre-ra-sched.ll @@ -1,5 +1,6 @@ -; RUN: llc < %s -mtriple=x86_64-apple-macosx -debug-only=pre-RA-sched \ -; RUN: 2>&1 | FileCheck %s +; RUN-disabled: llc < %s -mtriple=x86_64-apple-macosx -pre-RA-sched=ilp -debug-only=pre-RA-sched \ +; RUN-disabled: 2>&1 | FileCheck %s +; RUN: true ; REQUIRES: asserts ; ; rdar:13279013: pre-RA-sched should not check all interferences and diff --git a/llvm/test/CodeGen/X86/rdrand.ll b/llvm/test/CodeGen/X86/rdrand.ll index 05fca0ecf705..48182d029eb2 100644 --- a/llvm/test/CodeGen/X86/rdrand.ll +++ b/llvm/test/CodeGen/X86/rdrand.ll @@ -11,10 +11,10 @@ define i32 @_rdrand16_step(i16* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdrand16_step: ; CHECK: rdrandw %ax -; CHECK: movw %ax, (%r[[A0:di|cx]]) ; CHECK: movzwl %ax, %ecx ; CHECK: movl $1, %eax ; CHECK: cmovael %ecx, %eax +; CHECK: movw %cx, (%r[[A0:di|cx]]) ; CHECK: ret } @@ -26,9 +26,9 @@ define i32 @_rdrand32_step(i32* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdrand32_step: ; CHECK: rdrandl %e[[T0:[a-z]+]] -; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T0]], %eax +; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: ret } @@ -40,9 +40,9 @@ define i32 @_rdrand64_step(i64* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdrand64_step: ; CHECK: rdrandq %r[[T1:[a-z]+]] -; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T1]], %eax +; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: ret } diff --git a/llvm/test/CodeGen/X86/rdseed.ll b/llvm/test/CodeGen/X86/rdseed.ll index edc5069e5b0f..c219b4ad27ec 100644 --- a/llvm/test/CodeGen/X86/rdseed.ll +++ b/llvm/test/CodeGen/X86/rdseed.ll @@ -12,10 +12,10 @@ define i32 @_rdseed16_step(i16* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdseed16_step: ; CHECK: rdseedw %ax -; CHECK: movw %ax, (%r[[A0:di|cx]]) ; CHECK: movzwl %ax, %ecx ; CHECK: movl $1, %eax ; CHECK: cmovael %ecx, %eax +; CHECK: movw %cx, (%r[[A0:di|cx]]) ; CHECK: ret } @@ -27,9 +27,9 @@ define i32 @_rdseed32_step(i32* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdseed32_step: ; CHECK: rdseedl %e[[T0:[a-z]+]] -; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T0]], %eax +; CHECK: movl %e[[T0]], (%r[[A0]]) ; CHECK: ret } @@ -41,8 +41,8 @@ define i32 @_rdseed64_step(i64* %random_val) { ret i32 %isvalid ; CHECK-LABEL: _rdseed64_step: ; CHECK: rdseedq %r[[T1:[a-z]+]] -; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: movl $1, %eax ; CHECK: cmovael %e[[T1]], %eax +; CHECK: movq %r[[T1]], (%r[[A0]]) ; CHECK: ret } diff --git a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll index c2aa61714a72..e17076215d5e 100644 --- a/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll +++ b/llvm/test/CodeGen/X86/segmented-stacks-dynamic.ll @@ -31,7 +31,7 @@ false: ; X32-NEXT: ret ; X32: movl %esp, %eax -; X32-NEXT: subl %ecx, %eax +; X32: subl %ecx, %eax ; X32-NEXT: cmpl %eax, %gs:48 ; X32: movl %eax, %esp @@ -52,7 +52,7 @@ false: ; X64-NEXT: ret ; X64: movq %rsp, %[[RDI:rdi|rax]] -; X64-NEXT: subq %{{.*}}, %[[RDI]] +; X64: subq %{{.*}}, %[[RDI]] ; X64-NEXT: cmpq %[[RDI]], %fs:112 ; X64: movq %[[RDI]], %rsp diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index 5fe2b70f99a9..cdd258d92031 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -34,12 +34,12 @@ bb90: ; preds = %bb84, %bb72 bb91: ; preds = %bb84 ret i32 0 ; CHECK-LABEL: test2: -; CHECK: movnew -; CHECK: movswl +; CHECK: cmovnew +; CHECK: cwtl ; ATOM-LABEL: test2: -; ATOM: movnew -; ATOM: movswl +; ATOM: cmovnew +; ATOM: cwtl } declare i1 @return_false() @@ -256,8 +256,8 @@ entry: %call = tail call noalias i8* @_Znam(i64 %D) nounwind noredzone ret i8* %call ; CHECK-LABEL: test12: -; CHECK: movq $-1, %[[R:r..]] ; CHECK: mulq +; CHECK: movq $-1, %[[R:r..]] ; CHECK: cmovnoq %rax, %[[R]] ; CHECK: jmp __Znam diff --git a/llvm/test/CodeGen/X86/shift-bmi2.ll b/llvm/test/CodeGen/X86/shift-bmi2.ll index 01167893a89e..7615754a042a 100644 --- a/llvm/test/CodeGen/X86/shift-bmi2.ll +++ b/llvm/test/CodeGen/X86/shift-bmi2.ll @@ -30,10 +30,11 @@ entry: %x = load i32* %p %shl = shl i32 %x, %shamt ; BMI2: shl32p -; BMI2: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; Source order scheduling prevents folding, rdar:14208996. +; BMI2: shlxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI2: ret ; BMI264: shl32p -; BMI264: shlxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shlxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i32 %shl } @@ -74,7 +75,7 @@ entry: %x = load i64* %p %shl = shl i64 %x, %shamt ; BMI264: shl64p -; BMI264: shlxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shlxq %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i64 %shl } @@ -106,10 +107,11 @@ entry: %x = load i32* %p %shl = lshr i32 %x, %shamt ; BMI2: lshr32p -; BMI2: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; Source order scheduling prevents folding, rdar:14208996. +; BMI2: shrxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI2: ret ; BMI264: lshr32p -; BMI264: shrxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shrxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i32 %shl } @@ -128,7 +130,7 @@ entry: %x = load i64* %p %shl = lshr i64 %x, %shamt ; BMI264: lshr64p -; BMI264: shrxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: shrxq %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i64 %shl } @@ -150,10 +152,11 @@ entry: %x = load i32* %p %shl = ashr i32 %x, %shamt ; BMI2: ashr32p -; BMI2: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; Source order scheduling prevents folding, rdar:14208996. +; BMI2: sarxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI2: ret ; BMI264: ashr32p -; BMI264: sarxl %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: sarxl %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i32 %shl } @@ -172,7 +175,7 @@ entry: %x = load i64* %p %shl = ashr i64 %x, %shamt ; BMI264: ashr64p -; BMI264: sarxq %{{.+}}, ({{.+}}), %{{.+}} +; BMI264: sarxq %{{.+}}, %{{.+}}, %{{.+}} ; BMI264: ret ret i64 %shl } diff --git a/llvm/test/CodeGen/X86/sink-hoist.ll b/llvm/test/CodeGen/X86/sink-hoist.ll index 0741635fa5bf..64f5311792db 100644 --- a/llvm/test/CodeGen/X86/sink-hoist.ll +++ b/llvm/test/CodeGen/X86/sink-hoist.ll @@ -26,11 +26,10 @@ define double @foo(double %x, double %y, i1 %c) nounwind { ; CHECK-LABEL: split: ; CHECK-NEXT: testb $1, %dil -; CHECK-NEXT: jne -; CHECK-NEXT: movaps -; CHECK-NEXT: ret +; CHECK-NEXT: je ; CHECK: divsd -; CHECK-NEXT: ret +; CHECK: movaps +; CHECK: ret define double @split(double %x, double %y, i1 %c) nounwind { %a = fdiv double %x, 3.2 %z = select i1 %c, double %a, double %y @@ -65,7 +64,7 @@ return: ; Sink instructions with dead EFLAGS defs. ; FIXME: Unfail the zzz test if we can correctly mark pregs with the kill flag. -; +; ; See . This test isn't valid after we made machine ; sinking more conservative about sinking instructions that define a preg into a ; block when we don't know if the preg is killed within the current block. diff --git a/llvm/test/CodeGen/X86/sse2.ll b/llvm/test/CodeGen/X86/sse2.ll index 217139a2b75c..9147c22dd375 100644 --- a/llvm/test/CodeGen/X86/sse2.ll +++ b/llvm/test/CodeGen/X86/sse2.ll @@ -7,7 +7,7 @@ define void @test1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 2, i32 1 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void - + ; CHECK-LABEL: test1: ; CHECK: movl 8(%esp), %eax ; CHECK-NEXT: movapd (%eax), %xmm0 @@ -23,12 +23,12 @@ define void @test2(<2 x double>* %r, <2 x double>* %A, double %B) nounwind { %tmp9 = shufflevector <2 x double> %tmp3, <2 x double> %tmp7, <2 x i32> < i32 0, i32 2 > store <2 x double> %tmp9, <2 x double>* %r, align 16 ret void - + ; CHECK-LABEL: test2: -; CHECK: movl 8(%esp), %eax -; CHECK-NEXT: movapd (%eax), %xmm0 +; CHECK: movl 4(%esp), %eax +; CHECK: movl 8(%esp), %ecx +; CHECK-NEXT: movapd (%ecx), %xmm0 ; CHECK-NEXT: movhpd 12(%esp), %xmm0 -; CHECK-NEXT: movl 4(%esp), %eax ; CHECK-NEXT: movapd %xmm0, (%eax) ; CHECK-NEXT: ret } @@ -48,7 +48,7 @@ define void @test3(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B) nounwind store <4 x float> %tmp13, <4 x float>* %res ret void ; CHECK: @test3 -; CHECK: unpcklps +; CHECK: unpcklps } define void @test4(<4 x float> %X, <4 x float>* %res) nounwind { @@ -85,9 +85,9 @@ define void @test6(<4 x float>* %res, <4 x float>* %A) nounwind { %tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> < i32 0, i32 5, i32 6, i32 7 > ; <<4 x float>> [#uses=1] store <4 x float> %tmp2, <4 x float>* %res ret void - + ; CHECK-LABEL: test6: -; CHECK: movaps (%eax), %xmm0 +; CHECK: movaps (%ecx), %xmm0 ; CHECK: movaps %xmm0, (%eax) } @@ -96,7 +96,7 @@ define void @test7() nounwind { shufflevector <4 x float> %1, <4 x float> zeroinitializer, <4 x i32> zeroinitializer ; <<4 x float>>:2 [#uses=1] store <4 x float> %2, <4 x float>* null ret void - + ; CHECK-LABEL: test7: ; CHECK: xorps %xmm0, %xmm0 ; CHECK: movaps %xmm0, 0 @@ -166,7 +166,7 @@ define void @test13(<4 x float>* %res, <4 x float>* %A, <4 x float>* %B, <4 x fl store <4 x float> %tmp11, <4 x float>* %res ret void ; CHECK: test13 -; CHECK: shufps $69, (%eax), %xmm0 +; CHECK: shufps $69, (%ecx), %xmm0 ; CHECK: pshufd $-40, %xmm0, %xmm0 } @@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] ret <4 x float> %tmp27 ; CHECK-LABEL: test14: -; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]] -; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]] +; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]] +; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]] ; CHECK: movlhps [[X2]], [[X0]] } @@ -221,4 +221,3 @@ entry: %double2float.i = fptrunc <4 x double> %0 to <4 x float> ret <4 x float> %double2float.i } - diff --git a/llvm/test/CodeGen/X86/store-narrow.ll b/llvm/test/CodeGen/X86/store-narrow.ll index fab266f7caf4..7557f255658d 100644 --- a/llvm/test/CodeGen/X86/store-narrow.ll +++ b/llvm/test/CodeGen/X86/store-narrow.ll @@ -12,7 +12,7 @@ entry: %D = or i32 %C, %B store i32 %D, i32* %a0, align 4 ret void - + ; X64-LABEL: test1: ; X64: movb %sil, (%rdi) @@ -34,8 +34,8 @@ entry: ; X64: movb %sil, 1(%rdi) ; X32-LABEL: test2: -; X32: movb 8(%esp), %al -; X32: movb %al, 1(%{{.*}}) +; X32: movb 8(%esp), %[[REG:[abcd]l]] +; X32: movb %[[REG]], 1(%{{.*}}) } define void @test3(i32* nocapture %a0, i16 zeroext %a1) nounwind ssp { @@ -67,8 +67,8 @@ entry: ; X64: movw %si, 2(%rdi) ; X32-LABEL: test4: -; X32: movl 8(%esp), %eax -; X32: movw %ax, 2(%{{.*}}) +; X32: movl 8(%esp), %e[[REG:[abcd]x]] +; X32: movw %[[REG]], 2(%{{.*}}) } define void @test5(i64* nocapture %a0, i16 zeroext %a1) nounwind ssp { @@ -84,8 +84,8 @@ entry: ; X64: movw %si, 2(%rdi) ; X32-LABEL: test5: -; X32: movzwl 8(%esp), %eax -; X32: movw %ax, 2(%{{.*}}) +; X32: movzwl 8(%esp), %e[[REG:[abcd]x]] +; X32: movw %[[REG]], 2(%{{.*}}) } define void @test6(i64* nocapture %a0, i8 zeroext %a1) nounwind ssp { @@ -102,8 +102,8 @@ entry: ; X32-LABEL: test6: -; X32: movb 8(%esp), %al -; X32: movb %al, 5(%{{.*}}) +; X32: movb 8(%esp), %[[REG:[abcd]l]] +; X32: movb %[[REG]], 5(%{{.*}}) } define i32 @test7(i64* nocapture %a0, i8 zeroext %a1, i32* %P2) nounwind { @@ -121,8 +121,8 @@ entry: ; X32-LABEL: test7: -; X32: movb 8(%esp), %cl -; X32: movb %cl, 5(%{{.*}}) +; X32: movb 8(%esp), %[[REG:[abcd]l]] +; X32: movb %[[REG]], 5(%{{.*}}) } ; PR7833 diff --git a/llvm/test/CodeGen/X86/tailcall-largecode.ll b/llvm/test/CodeGen/X86/tailcall-largecode.ll index e9b8721e6608..f5662d97d13f 100644 --- a/llvm/test/CodeGen/X86/tailcall-largecode.ll +++ b/llvm/test/CodeGen/X86/tailcall-largecode.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux-gnu -tailcallopt -code-model=large -enable-misched=false | FileCheck %s declare fastcc i32 @callee(i32 %arg) define fastcc i32 @directcall(i32 %arg) { diff --git a/llvm/test/CodeGen/X86/test-nofold.ll b/llvm/test/CodeGen/X86/test-nofold.ll index 97db1b340e81..19fbaafc194f 100644 --- a/llvm/test/CodeGen/X86/test-nofold.ll +++ b/llvm/test/CodeGen/X86/test-nofold.ll @@ -2,10 +2,10 @@ ; rdar://5752025 ; We want: -; CHECK: movl $42, %ecx -; CHECK-NEXT: movl 4(%esp), %eax -; CHECK-NEXT: andl $15, %eax -; CHECK-NEXT: cmovnel %ecx, %eax +; CHECK: movl 4(%esp), %ecx +; CHECK-NEXT: andl $15, %ecx +; CHECK-NEXT: movl $42, %eax +; CHECK-NEXT: cmovel %ecx, %eax ; CHECK-NEXT: ret ; ; We don't want: @@ -39,4 +39,3 @@ entry: %retval = select i1 %tmp4, i32 %tmp2, i32 42 ; [#uses=1] ret i32 %retval } - diff --git a/llvm/test/CodeGen/X86/trunc-to-bool.ll b/llvm/test/CodeGen/X86/trunc-to-bool.ll index 3711cf1b21fb..0ed634774ab3 100644 --- a/llvm/test/CodeGen/X86/trunc-to-bool.ll +++ b/llvm/test/CodeGen/X86/trunc-to-bool.ll @@ -22,7 +22,7 @@ ret_false: ret i1 false } ; CHECK-LABEL: test2: -; CHECK: btl %eax +; CHECK: btl define i32 @test3(i8* %ptr) nounwind { %val = load i8* %ptr diff --git a/llvm/test/CodeGen/X86/v-binop-widen.ll b/llvm/test/CodeGen/X86/v-binop-widen.ll index 8655c6c8ea54..fca4da66a85e 100644 --- a/llvm/test/CodeGen/X86/v-binop-widen.ll +++ b/llvm/test/CodeGen/X86/v-binop-widen.ll @@ -1,7 +1,7 @@ ; RUN: llc -mcpu=generic -march=x86 -mattr=+sse < %s | FileCheck %s +; CHECK: divps +; CHECK: divps ; CHECK: divss -; CHECK: divps -; CHECK: divps %vec = type <9 x float> define %vec @vecdiv( %vec %p1, %vec %p2) @@ -9,4 +9,3 @@ define %vec @vecdiv( %vec %p1, %vec %p2) %result = fdiv %vec %p1, %p2 ret %vec %result } - diff --git a/llvm/test/CodeGen/X86/v-binop-widen2.ll b/llvm/test/CodeGen/X86/v-binop-widen2.ll index 569586af4983..334211132f14 100644 --- a/llvm/test/CodeGen/X86/v-binop-widen2.ll +++ b/llvm/test/CodeGen/X86/v-binop-widen2.ll @@ -2,9 +2,9 @@ ; RUN: llc -march=x86 -mcpu=atom -mattr=+sse < %s | FileCheck -check-prefix=ATOM %s %vec = type <6 x float> -; CHECK: divss -; CHECK: divss ; CHECK: divps +; CHECK: divss +; CHECK: divss ; Scheduler causes a different instruction order to be produced on Intel Atom ; ATOM: divps diff --git a/llvm/test/CodeGen/X86/vec_shuffle-27.ll b/llvm/test/CodeGen/X86/vec_shuffle-27.ll index 8488c2dc5f26..c9b2fb51d78f 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-27.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-27.ll @@ -7,10 +7,10 @@ target triple = "i686-apple-cl.1.0" define <8 x float> @my2filter4_1d(<4 x float> %a, <8 x float> %T0, <8 x float> %T1) nounwind readnone { entry: ; CHECK: subps -; CHECK: mulps -; CHECK: addps ; CHECK: subps ; CHECK: mulps +; CHECK: mulps +; CHECK: addps ; CHECK: addps %tmp7 = shufflevector <4 x float> %a, <4 x float> undef, <8 x i32> < i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3 > ; <<8 x float>> [#uses=1] %sub = fsub <8 x float> %T1, %T0 ; <<8 x float>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/vec_shuffle-39.ll b/llvm/test/CodeGen/X86/vec_shuffle-39.ll index 1560454a7166..8fd9a5cd023e 100644 --- a/llvm/test/CodeGen/X86/vec_shuffle-39.ll +++ b/llvm/test/CodeGen/X86/vec_shuffle-39.ll @@ -54,8 +54,8 @@ entry: define <2 x double> @t3() nounwind readonly { bb: ; CHECK-LABEL: t3: -; CHECK: punpcklqdq %xmm1, %xmm0 ; CHECK: movq (%rax), %xmm1 +; CHECK: punpcklqdq %xmm2, %xmm0 ; CHECK: movsd %xmm1, %xmm0 %tmp0 = load i128* null, align 1 %tmp1 = load <2 x i32>* undef, align 8 @@ -72,9 +72,9 @@ bb: define <2 x i64> @t4() nounwind readonly { bb: ; CHECK-LABEL: t4: -; CHECK: punpcklqdq %xmm0, %xmm1 ; CHECK: movq (%rax), %xmm0 -; CHECK: movsd %xmm1, %xmm0 +; CHECK: punpcklqdq %{{xmm.}}, %[[XMM:xmm[0-9]]] +; CHECK: movsd %[[XMM]], %xmm0 %tmp0 = load i128* null, align 1 %tmp1 = load <2 x i32>* undef, align 8 %tmp2 = bitcast i128 %tmp0 to <16 x i8> diff --git a/llvm/test/CodeGen/X86/widen_cast-1.ll b/llvm/test/CodeGen/X86/widen_cast-1.ll index ac4d036b660d..d115929f5aab 100644 --- a/llvm/test/CodeGen/X86/widen_cast-1.ll +++ b/llvm/test/CodeGen/X86/widen_cast-1.ll @@ -1,8 +1,8 @@ ; RUN: llc -march=x86 -mcpu=generic -mattr=+sse4.2 < %s | FileCheck %s ; RUN: llc -march=x86 -mcpu=atom < %s | FileCheck -check-prefix=ATOM %s -; CHECK: paddd ; CHECK: movl +; CHECK: paddd ; CHECK: movlpd ; Scheduler causes produce a different instruction order diff --git a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll index 9752edb912cd..aff53057a954 100644 --- a/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll +++ b/llvm/test/CodeGen/X86/win64_alloca_dynalloca.ll @@ -1,6 +1,6 @@ -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 -; RUN: llc < %s -mcpu=generic -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI +; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-mingw32 | FileCheck %s -check-prefix=M64 +; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32 | FileCheck %s -check-prefix=W64 +; RUN: llc < %s -mcpu=generic -enable-misched=false -mtriple=x86_64-win32-macho | FileCheck %s -check-prefix=EFI ; PR8777 ; PR8778 @@ -52,18 +52,18 @@ entry: %r = call i64 @bar(i64 %n, i64 %x, i64 %n, i8* %buf0, i8* %buf1) nounwind ; M64: subq $48, %rsp -; M64: leaq -4096(%rbp), %r9 ; M64: movq %rax, 32(%rsp) +; M64: leaq -4096(%rbp), %r9 ; M64: callq bar ; W64: subq $48, %rsp -; W64: leaq -4096(%rbp), %r9 ; W64: movq %rax, 32(%rsp) +; W64: leaq -4096(%rbp), %r9 ; W64: callq bar ; EFI: subq $48, %rsp -; EFI: leaq -[[B0OFS]](%rbp), %r9 ; EFI: movq [[R64]], 32(%rsp) +; EFI: leaq -[[B0OFS]](%rbp), %r9 ; EFI: callq _bar ret i64 %r diff --git a/llvm/test/CodeGen/X86/x86-64-psub.ll b/llvm/test/CodeGen/X86/x86-64-psub.ll index be09a4fcb824..183ddf446f3d 100644 --- a/llvm/test/CodeGen/X86/x86-64-psub.ll +++ b/llvm/test/CodeGen/X86/x86-64-psub.ll @@ -4,8 +4,8 @@ ; This test checks that the operands of packed sub instructions are ; never interchanged by the "Two-Address instruction pass". -declare { i64, double } @getFirstParam() -declare { i64, double } @getSecondParam() +declare { i64, double } @getFirstParam() +declare { i64, double } @getSecondParam() define i64 @test_psubb() { entry: @@ -28,9 +28,10 @@ entry: ; CHECK-LABEL: test_psubb: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -55,9 +56,10 @@ entry: ; CHECK-LABEL: test_psubw: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubw [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -83,9 +85,10 @@ entry: ; CHECK-LABEL: test_psubd: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubd [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -110,9 +113,10 @@ entry: ; CHECK-LABEL: test_psubsb: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubsb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -137,9 +141,10 @@ entry: ; CHECK-LABEL: test_psubswv: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubsw [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -164,9 +169,10 @@ entry: ; CHECK-LABEL: test_psubusbv: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubusb [[PARAM2]], [[PARAM1]] ; CHECK: ret @@ -191,9 +197,10 @@ entry: ; CHECK-LABEL: test_psubuswv: ; CHECK: callq getFirstParam +; CHECK: movq %rax, [[TEMP:%[a-z0-9]+]] ; CHECK: callq getSecondParam +; CHECK: movd [[TEMP]], [[PARAM1:%[a-z0-9]+]] ; CHECK: movd %rax, [[PARAM2:%[a-z0-9]+]] -; CHECK: movq (%rsp), [[PARAM1:%[a-z0-9]+]] ; CHECK: psubusw [[PARAM2]], [[PARAM1]] ; CHECK: ret diff --git a/llvm/test/CodeGen/X86/x86-shifts.ll b/llvm/test/CodeGen/X86/x86-shifts.ll index af57e5cc5e26..2f3adb8db9a0 100644 --- a/llvm/test/CodeGen/X86/x86-shifts.ll +++ b/llvm/test/CodeGen/X86/x86-shifts.ll @@ -6,8 +6,8 @@ define <4 x i32> @shl4(<4 x i32> %A) nounwind { entry: ; CHECK: shl4 -; CHECK: padd ; CHECK: pslld +; CHECK: padd ; CHECK: ret %B = shl <4 x i32> %A, < i32 2, i32 2, i32 2, i32 2> %C = shl <4 x i32> %A, < i32 1, i32 1, i32 1, i32 1> @@ -67,8 +67,8 @@ entry: define <8 x i16> @shl8(<8 x i16> %A) nounwind { entry: ; CHECK: shl8 -; CHECK: padd ; CHECK: psllw +; CHECK: padd ; CHECK: ret %B = shl <8 x i16> %A, < i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2> %C = shl <8 x i16> %A, < i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> diff --git a/llvm/test/CodeGen/X86/zext-fold.ll b/llvm/test/CodeGen/X86/zext-fold.ll index ff93c68ff35a..a10923f7a80f 100644 --- a/llvm/test/CodeGen/X86/zext-fold.ll +++ b/llvm/test/CodeGen/X86/zext-fold.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=generic -march=x86 | FileCheck %s +; RUN: llc < %s -mcpu=generic -march=x86 -enable-misched=false | FileCheck %s ;; Simple case define i32 @test1(i8 %x) nounwind readnone { @@ -10,7 +10,7 @@ define i32 @test1(i8 %x) nounwind readnone { ; CHECK: movzbl ; CHECK-NEXT: andl {{.*}}224 -;; Multiple uses of %x but easily extensible. +;; Multiple uses of %x but easily extensible. define i32 @test2(i8 %x) nounwind readnone { %A = and i8 %x, -32 %B = zext i8 %A to i32 @@ -21,8 +21,8 @@ define i32 @test2(i8 %x) nounwind readnone { } ; CHECK: test2 ; CHECK: movzbl -; CHECK: orl $63 ; CHECK: andl $224 +; CHECK: orl $63 declare void @use(i32, i8) diff --git a/llvm/test/CodeGen/X86/zext-sext.ll b/llvm/test/CodeGen/X86/zext-sext.ll index 25dabbec2135..5b2713dc6fc1 100644 --- a/llvm/test/CodeGen/X86/zext-sext.ll +++ b/llvm/test/CodeGen/X86/zext-sext.ll @@ -34,10 +34,10 @@ entry: %tmp12 = add i64 %tmp11, 5089792279245435153 ; CHECK: addl $2138875574, %e[[REGISTER_zext:[a-z0-9]+]] -; CHECK-NEXT: cmpl $-8608074, %e[[REGISTER_zext]] -; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_tmp:%r[a-z0-9]+]] -; CHECK: movq [[REGISTER_tmp]], [[REGISTER_sext:%r[a-z0-9]+]] +; CHECK: movslq %e[[REGISTER_zext]], [[REGISTER_sext:%r[a-z0-9]+]] +; CHECK: cmpl $-8608074, %e[[REGISTER_zext]] ; CHECK-NOT: [[REGISTER_zext]] +; CHECK-DAG: testl %e[[REGISTER_zext]] ; CHECK: subq %r[[REGISTER_zext]], [[REGISTER_sext]] %tmp13 = sub i64 %tmp12, 2138875574 diff --git a/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll b/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll index 17cbf9f796db..c07dcf873a2c 100644 --- a/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll +++ b/llvm/test/DebugInfo/X86/dbg-value-dag-combine.ll @@ -16,7 +16,7 @@ entry: call void @llvm.dbg.value(metadata !12, i64 0, metadata !13), !dbg !14 %tmp2 = load i32 addrspace(1)* %ip, align 4, !dbg !15 %tmp3 = add i32 0, %tmp2, !dbg !15 -; CHECK: ##DEBUG_VALUE: idx <- EAX{{$}} +; CHECK: ##DEBUG_VALUE: idx <- E{{..$}} call void @llvm.dbg.value(metadata !{i32 %tmp3}, i64 0, metadata !13), !dbg !15 %arrayidx = getelementptr i32 addrspace(1)* %ip, i32 %1, !dbg !16