From 8fdafb7dced812b2dc0af77f9668bfe23b4ffb0b Mon Sep 17 00:00:00 2001 From: "Liu, Chen3" Date: Thu, 16 Jan 2020 10:49:59 +0800 Subject: [PATCH] Insert wait instruction after X87 instructions which could raise float-point exception. This patch also modify some mayRaiseFPException flag which set in D68854. Differential Revision: https://reviews.llvm.org/D72750 --- llvm/lib/Target/X86/CMakeLists.txt | 1 + llvm/lib/Target/X86/X86.h | 4 + llvm/lib/Target/X86/X86FloatingPoint.cpp | 3 + llvm/lib/Target/X86/X86InsertWait.cpp | 151 ++++++++++++++++++ llvm/lib/Target/X86/X86InstrFPStack.td | 7 +- llvm/lib/Target/X86/X86TargetMachine.cpp | 1 + llvm/test/CodeGen/X86/O0-pipeline.ll | 1 + llvm/test/CodeGen/X86/O3-pipeline.ll | 1 + .../CodeGen/X86/constrained-fp80-trunc-ext.ll | 4 + llvm/test/CodeGen/X86/fp-intrinsics.ll | 81 ++++++++++ .../CodeGen/X86/fp-strict-libcalls-msvc32.ll | 18 +++ llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll | 112 +++++++++++++ .../CodeGen/X86/fp-strict-scalar-fptoint.ll | 33 ++++ .../CodeGen/X86/fp-strict-scalar-inttofp.ll | 61 +++++++ .../CodeGen/X86/fp-strict-scalar-round.ll | 20 +++ llvm/test/CodeGen/X86/fp-strict-scalar.ll | 32 ++++ llvm/test/CodeGen/X86/fp128-cast-strict.ll | 10 ++ .../CodeGen/X86/fp80-strict-scalar-cmp.ll | 56 +++++++ llvm/test/CodeGen/X86/fp80-strict-scalar.ll | 61 +++++++ llvm/test/CodeGen/X86/vec-strict-128.ll | 6 + .../CodeGen/X86/vec-strict-fptoint-128.ll | 42 +++++ .../CodeGen/X86/vec-strict-fptoint-256.ll | 30 ++++ .../CodeGen/X86/vec-strict-fptoint-512.ll | 18 +++ .../CodeGen/X86/vec-strict-inttofp-128.ll | 12 ++ .../CodeGen/X86/vec-strict-inttofp-256.ll | 7 + .../CodeGen/X86/vec-strict-inttofp-512.ll | 14 ++ .../X86/vector-constrained-fp-intrinsics.ll | 28 ++++ 27 files changed, 811 insertions(+), 3 deletions(-) create mode 100644 llvm/lib/Target/X86/X86InsertWait.cpp diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 58f2292dd4cd..3f0d68c0c788 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -69,6 +69,7 @@ set(sources X86VZeroUpper.cpp X86WinAllocaExpander.cpp X86WinEHState.cpp + X86InsertWait.cpp ) add_llvm_target(X86CodeGen ${sources}) diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 0481a40d462a..604438f83531 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -129,6 +129,10 @@ FunctionPass *createX86DiscriminateMemOpsPass(); /// This pass applies profiling information to insert cache prefetches. FunctionPass *createX86InsertPrefetchPass(); +/// This pass insert wait instruction after X87 instructions which could raise +/// fp exceptions when strict-fp enabled. +FunctionPass *createX86InsertX87waitPass(); + InstructionSelector *createX86InstructionSelector(const X86TargetMachine &TM, X86Subtarget &, X86RegisterBankInfo &); diff --git a/llvm/lib/Target/X86/X86FloatingPoint.cpp b/llvm/lib/Target/X86/X86FloatingPoint.cpp index 13bbd6ccfce4..e6ee46957500 100644 --- a/llvm/lib/Target/X86/X86FloatingPoint.cpp +++ b/llvm/lib/Target/X86/X86FloatingPoint.cpp @@ -1364,6 +1364,9 @@ void FPS::handleTwoArgFP(MachineBasicBlock::iterator &I) { MBB->remove(&*I++); I = BuildMI(*MBB, I, dl, TII->get(Opcode)).addReg(getSTReg(NotTOS)); + if (!MI.mayRaiseFPException()) + I->setFlag(MachineInstr::MIFlag::NoFPExcept); + // If both operands are killed, pop one off of the stack in addition to // overwriting the other one. if (KillsOp0 && KillsOp1 && Op0 != Op1) { diff --git a/llvm/lib/Target/X86/X86InsertWait.cpp b/llvm/lib/Target/X86/X86InsertWait.cpp new file mode 100644 index 000000000000..a82d98d88b30 --- /dev/null +++ b/llvm/lib/Target/X86/X86InsertWait.cpp @@ -0,0 +1,151 @@ +//- X86Insertwait.cpp - Strict-Fp:Insert wait instruction X87 instructions --// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass which insert x86 wait instructions after each +// X87 instructions when strict float is enabled. +// +// The logic to insert a wait instruction after an X87 instruction is as below: +// 1. If the X87 instruction don't raise float exception nor is a load/store +// instruction, or is a x87 control instruction, don't insert wait. +// 2. If the X87 instruction is an instruction which the following instruction +// is an X87 exception synchronizing X87 instruction, don't insert wait. +// 3. For other situations, insert wait instruction. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineOperand.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-insert-wait" + +namespace { + +class WaitInsert : public MachineFunctionPass { +public: + static char ID; + + WaitInsert() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "X86 insert wait instruction"; + } + +private: + const TargetInstrInfo *TII; // Machine instruction info. +}; + +} // namespace + +char WaitInsert::ID = 0; + +FunctionPass *llvm::createX86InsertX87waitPass() { return new WaitInsert(); } + +/// Return true if the Reg is X87 register. +static bool isX87Reg(unsigned Reg) { + return (Reg == X86::FPCW || Reg == X86::FPSW || + (Reg >= X86::ST0 && Reg <= X86::ST7)); +} + +/// check if the instruction is X87 instruction +static bool isX87Instruction(MachineInstr &MI) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + if (isX87Reg(MO.getReg())) + return true; + } + return false; +} + +static bool isX87ControlInstruction(MachineInstr &MI) { + switch (MI.getOpcode()) { + case X86::FNINIT: + case X86::FLDCW16m: + case X86::FNSTCW16m: + case X86::FNSTSW16r: + case X86::FNSTSWm: + case X86::FNCLEX: + case X86::FLDENVm: + case X86::FSTENVm: + case X86::FRSTORm: + case X86::FSAVEm: + case X86::FINCSTP: + case X86::FDECSTP: + case X86::FFREE: + case X86::FFREEP: + case X86::FNOP: + case X86::WAIT: + return true; + default: + return false; + } +} + +static bool isX87NonWaitingControlInstruction(MachineInstr &MI) { + // a few special control instructions don't perform a wait operation + switch (MI.getOpcode()) { + case X86::FNINIT: + case X86::FNSTSW16r: + case X86::FNSTSWm: + case X86::FNSTCW16m: + case X86::FNCLEX: + return true; + default: + return false; + } +} + +bool WaitInsert::runOnMachineFunction(MachineFunction &MF) { + if (!MF.getFunction().hasFnAttribute(Attribute::StrictFP)) + return false; + + const X86Subtarget &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + bool Changed = false; + + for (MachineBasicBlock &MBB : MF) { + for (MachineBasicBlock::iterator MI = MBB.begin(); MI != MBB.end(); ++MI) { + // Jump non X87 instruction. + if (!isX87Instruction(*MI)) + continue; + // If the instruction instruction neither has float exception nor is + // a load/store instruction, or the instruction is x87 control + // instruction, do not insert wait. + if (!(MI->mayRaiseFPException() || MI->mayLoadOrStore()) || + isX87ControlInstruction(*MI)) + continue; + // If the following instruction is an X87 instruction and isn't an X87 + // non-waiting control instruction, we can omit insert wait instruction. + MachineBasicBlock::iterator AfterMI = std::next(MI); + if (AfterMI != MBB.end() && isX87Instruction(*AfterMI) && + !isX87NonWaitingControlInstruction(*AfterMI)) + continue; + + BuildMI(MBB, AfterMI, MI->getDebugLoc(), TII->get(X86::WAIT)); + LLVM_DEBUG(dbgs() << "\nInsert wait after:\t" << *MI); + // Jump the newly inserting wait + ++MI; + Changed = true; + } + } + return Changed; +} diff --git a/llvm/lib/Target/X86/X86InstrFPStack.td b/llvm/lib/Target/X86/X86InstrFPStack.td index 1830262205c6..677859ef0805 100644 --- a/llvm/lib/Target/X86/X86InstrFPStack.td +++ b/llvm/lib/Target/X86/X86InstrFPStack.td @@ -601,6 +601,7 @@ let SchedRW = [WriteMove], Uses = [FPCW] in { def LD_Frr : FPI<0xD9, MRM0r, (outs), (ins RSTi:$op), "fld\t$op">; def ST_Frr : FPI<0xDD, MRM2r, (outs), (ins RSTi:$op), "fst\t$op">; def ST_FPrr : FPI<0xDD, MRM3r, (outs), (ins RSTi:$op), "fstp\t$op">; +let mayRaiseFPException = 0 in def XCH_F : FPI<0xD9, MRM1r, (outs), (ins RSTi:$op), "fxch\t$op">; } @@ -620,13 +621,13 @@ def LD_Fp180 : FpI_<(outs RFP80:$dst), (ins), ZeroArgFP, [(set RFP80:$dst, fpimm1)]>; } -let SchedRW = [WriteFLD0], Uses = [FPCW] in +let SchedRW = [WriteFLD0], Uses = [FPCW], mayRaiseFPException = 0 in def LD_F0 : FPI<0xD9, MRM_EE, (outs), (ins), "fldz">; -let SchedRW = [WriteFLD1], Uses = [FPCW] in +let SchedRW = [WriteFLD1], Uses = [FPCW], mayRaiseFPException = 0 in def LD_F1 : FPI<0xD9, MRM_E8, (outs), (ins), "fld1">; -let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW] in { +let SchedRW = [WriteFLDC], Defs = [FPSW], Uses = [FPCW], mayRaiseFPException = 0 in { def FLDL2T : I<0xD9, MRM_E9, (outs), (ins), "fldl2t", []>; def FLDL2E : I<0xD9, MRM_EA, (outs), (ins), "fldl2e", []>; def FLDPI : I<0xD9, MRM_EB, (outs), (ins), "fldpi", []>; diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 8c696e9adbed..22b4e2805a5e 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -519,6 +519,7 @@ void X86PassConfig::addPreEmitPass() { } addPass(createX86DiscriminateMemOpsPass()); addPass(createX86InsertPrefetchPass()); + addPass(createX86InsertX87waitPass()); } void X86PassConfig::addPreEmitPass2() { diff --git a/llvm/test/CodeGen/X86/O0-pipeline.ll b/llvm/test/CodeGen/X86/O0-pipeline.ll index 33ecad677a63..5dfe02147e13 100644 --- a/llvm/test/CodeGen/X86/O0-pipeline.ll +++ b/llvm/test/CodeGen/X86/O0-pipeline.ll @@ -65,6 +65,7 @@ ; CHECK-NEXT: X86 vzeroupper inserter ; CHECK-NEXT: X86 Discriminate Memory Operands ; CHECK-NEXT: X86 Insert Cache Prefetches +; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/X86/O3-pipeline.ll b/llvm/test/CodeGen/X86/O3-pipeline.ll index 575b704b8b4c..3d96753b553f 100644 --- a/llvm/test/CodeGen/X86/O3-pipeline.ll +++ b/llvm/test/CodeGen/X86/O3-pipeline.ll @@ -174,6 +174,7 @@ ; CHECK-NEXT: Compressing EVEX instrs to VEX encoding when possible ; CHECK-NEXT: X86 Discriminate Memory Operands ; CHECK-NEXT: X86 Insert Cache Prefetches +; CHECK-NEXT: X86 insert wait instruction ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis diff --git a/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll b/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll index 9c408c70cfbf..efa86e241bad 100644 --- a/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll +++ b/llvm/test/CodeGen/X86/constrained-fp80-trunc-ext.ll @@ -6,6 +6,7 @@ define x86_fp80 @constrained_fpext_f32_as_fp80(float %mem) #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: flds -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq entry: %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32( @@ -19,6 +20,7 @@ define float @constrained_fptrunc_f80_to_f32(x86_fp80 %reg) #0 { ; CHECK: # %bb.0: ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstps -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; CHECK-NEXT: retq %trunc = call float @llvm.experimental.constrained.fptrunc.f32.f80( @@ -33,6 +35,7 @@ define x86_fp80 @constrained_fpext_f64_to_f80(double %mem) #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq entry: %ext = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64( @@ -46,6 +49,7 @@ define double @constrained_fptrunc_f80_to_f64(x86_fp80 %reg) #0 { ; CHECK: # %bb.0: ; CHECK-NEXT: fldt {{[0-9]+}}(%rsp) ; CHECK-NEXT: fstpl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: retq %trunc = call double @llvm.experimental.constrained.fptrunc.f64.f80( diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll index e00248b22df8..27f198168e38 100644 --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -19,6 +19,7 @@ define double @f1() #0 { ; X87: # %bb.0: # %entry ; X87-NEXT: fld1 ; X87-NEXT: fdivs {{\.LCPI.*}} +; X87-NEXT: wait ; X87-NEXT: retl ; ; X86-SSE-LABEL: f1: @@ -29,6 +30,7 @@ define double @f1() #0 { ; X86-SSE-NEXT: divsd {{\.LCPI.*}}, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -66,6 +68,7 @@ define double @f2(double %a) #0 { ; X87: # %bb.0: # %entry ; X87-NEXT: fldz ; X87-NEXT: fsubrl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl ; ; X86-SSE-LABEL: f2: @@ -77,6 +80,7 @@ define double @f2(double %a) #0 { ; X86-SSE-NEXT: subsd %xmm1, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -119,6 +123,7 @@ define double @f3(double %a, double %b) #0 { ; X87-NEXT: fsubl {{[0-9]+}}(%esp) ; X87-NEXT: fmull {{[0-9]+}}(%esp) ; X87-NEXT: fsubrp %st, %st(1) +; X87-NEXT: wait ; X87-NEXT: retl ; ; X86-SSE-LABEL: f3: @@ -132,6 +137,7 @@ define double @f3(double %a, double %b) #0 { ; X86-SSE-NEXT: subsd %xmm1, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -185,11 +191,13 @@ define double @f4(i32 %n, double %a) #0 { ; X87-LABEL: f4: ; X87: # %bb.0: # %entry ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: cmpl $0, {{[0-9]+}}(%esp) ; X87-NEXT: jle .LBB3_2 ; X87-NEXT: # %bb.1: # %if.then ; X87-NEXT: fld1 ; X87-NEXT: faddp %st, %st(1) +; X87-NEXT: wait ; X87-NEXT: .LBB3_2: # %if.end ; X87-NEXT: retl ; @@ -205,6 +213,7 @@ define double @f4(i32 %n, double %a) #0 { ; X86-SSE-NEXT: .LBB3_2: # %if.end ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -248,6 +257,7 @@ define double @f5() #0 { ; X87: # %bb.0: # %entry ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fsqrt +; X87-NEXT: wait ; X87-NEXT: retl ; ; X86-SSE-LABEL: f5: @@ -258,6 +268,7 @@ define double @f5() #0 { ; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -290,6 +301,7 @@ define double @f6() #0 { ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll pow ; X87-NEXT: addl $28, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -345,6 +357,7 @@ define double @f7() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: movl $3, {{[0-9]+}}(%esp) ; X87-NEXT: calll __powidf2 ; X87-NEXT: addl $12, %esp @@ -400,6 +413,7 @@ define double @f8() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll sin ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -450,6 +464,7 @@ define double @f9() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll cos ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -500,6 +515,7 @@ define double @f10() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll exp ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -550,6 +566,7 @@ define double @f11() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll exp2 ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -600,6 +617,7 @@ define double @f12() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll log ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -650,6 +668,7 @@ define double @f13() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll log10 ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -700,6 +719,7 @@ define double @f14() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll log2 ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -750,6 +770,7 @@ define double @f15() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll rint ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -797,6 +818,7 @@ define double @f16() #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{\.LCPI.*}} ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll nearbyint ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -843,6 +865,7 @@ define double @f19() #0 { ; X87-NEXT: .cfi_def_cfa_offset 32 ; X87-NEXT: flds {{\.LCPI.*}} ; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: movl $1072693248, {{[0-9]+}}(%esp) # imm = 0x3FF00000 ; X87-NEXT: movl $0, (%esp) ; X87-NEXT: calll fmod @@ -904,6 +927,7 @@ define i8 @f20s8(double %x) #0 { ; X87-NEXT: subl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 12 ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -950,6 +974,7 @@ define i16 @f20s16(double %x) #0 { ; X87-NEXT: subl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 12 ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -994,6 +1019,7 @@ define i32 @f20s(double %x) #0 { ; X87-NEXT: subl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 12 ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: fnstcw (%esp) ; X87-NEXT: movzwl (%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1036,6 +1062,7 @@ define i64 @f20s64(double %x) #0 { ; X87-NEXT: subl $20, %esp ; X87-NEXT: .cfi_def_cfa_offset 24 ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1056,6 +1083,7 @@ define i64 @f20s64(double %x) #0 { ; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-SSE-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1096,6 +1124,7 @@ define i128 @f20s128(double %x) nounwind strictfp { ; X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: calll __fixdfti @@ -1170,6 +1199,7 @@ define i8 @f20u8(double %x) #0 { ; X87-NEXT: subl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 12 ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1215,6 +1245,7 @@ define i16 @f20u16(double %x) #0 { ; X87-NEXT: subl $8, %esp ; X87-NEXT: .cfi_def_cfa_offset 12 ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: fnstcw (%esp) ; X87-NEXT: movzwl (%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1262,6 +1293,7 @@ define i32 @f20u(double %x) #0 { ; X87-NEXT: subl $20, %esp ; X87-NEXT: .cfi_def_cfa_offset 24 ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1325,14 +1357,17 @@ define i64 @f20u64(double %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 24 ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: flds {{\.LCPI.*}} +; X87-NEXT: wait ; X87-NEXT: xorl %edx, %edx ; X87-NEXT: fcomi %st(1), %st +; X87-NEXT: wait ; X87-NEXT: setbe %dl ; X87-NEXT: fldz ; X87-NEXT: fxch %st(1) ; X87-NEXT: fcmovnbe %st(1), %st ; X87-NEXT: fstp %st(1) ; X87-NEXT: fsubrp %st, %st(1) +; X87-NEXT: wait ; X87-NEXT: fnstcw {{[0-9]+}}(%esp) ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1363,6 +1398,7 @@ define i64 @f20u64(double %x) #0 { ; X86-SSE-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; X86-SSE-NEXT: setbe %al ; X86-SSE-NEXT: fldl {{[0-9]+}}(%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-SSE-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1435,6 +1471,7 @@ define i128 @f20u128(double %x) nounwind strictfp { ; X87-NEXT: movl {{[0-9]+}}(%esp), %esi ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: calll __fixunsdfti @@ -1509,6 +1546,7 @@ define float @f21() #0 { ; X87-NEXT: fldl {{\.LCPI.*}} ; X87-NEXT: fstps (%esp) ; X87-NEXT: flds (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -1521,6 +1559,7 @@ define float @f21() #0 { ; X86-SSE-NEXT: cvtsd2ss %xmm0, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -1548,6 +1587,7 @@ define double @f22(float %x) #0 { ; X87-LABEL: f22: ; X87: # %bb.0: # %entry ; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl ; ; X86-SSE-LABEL: f22: @@ -1558,6 +1598,7 @@ define double @f22(float %x) #0 { ; X86-SSE-NEXT: cvtss2sd %xmm0, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -1584,6 +1625,7 @@ define i32 @f23(double %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll lrint ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -1631,6 +1673,7 @@ define i32 @f24(float %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fstps (%esp) +; X87-NEXT: wait ; X87-NEXT: calll lrintf ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -1678,6 +1721,7 @@ define i64 @f25(double %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll llrint ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -1772,6 +1816,7 @@ define i32 @f27(double %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll lround ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -1818,6 +1863,7 @@ define i32 @f28(float %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fstps (%esp) +; X87-NEXT: wait ; X87-NEXT: calll lroundf ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -1864,6 +1910,7 @@ define i64 @f29(double %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll llround ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -1910,6 +1957,7 @@ define i64 @f30(float %x) #0 { ; X87-NEXT: .cfi_def_cfa_offset 16 ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fstps (%esp) +; X87-NEXT: wait ; X87-NEXT: calll llroundf ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 @@ -1960,6 +2008,7 @@ define double @sifdb(i8 %x) #0 { ; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -1972,6 +2021,7 @@ define double @sifdb(i8 %x) #0 { ; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2002,6 +2052,7 @@ define double @sifdw(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2014,6 +2065,7 @@ define double @sifdw(i16 %x) #0 { ; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2044,6 +2096,7 @@ define double @sifdi(i32 %x) #0 { ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2055,6 +2108,7 @@ define double @sifdi(i32 %x) #0 { ; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2083,6 +2137,7 @@ define float @siffb(i8 %x) #0 { ; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2095,6 +2150,7 @@ define float @siffb(i8 %x) #0 { ; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2125,6 +2181,7 @@ define float @siffw(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2137,6 +2194,7 @@ define float @siffw(i16 %x) #0 { ; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2167,6 +2225,7 @@ define float @siffi(i32 %x) #0 { ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2178,6 +2237,7 @@ define float @siffi(i32 %x) #0 { ; X86-SSE-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2202,6 +2262,7 @@ define double @sifdl(i64 %x) #0 { ; X87-LABEL: sifdl: ; X87: # %bb.0: # %entry ; X87-NEXT: fildll {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl ; ; X86-SSE-LABEL: sifdl: @@ -2211,6 +2272,7 @@ define double @sifdl(i64 %x) #0 { ; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fstpl (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2235,6 +2297,7 @@ define float @siffl(i64 %x) #0 { ; X87-LABEL: siffl: ; X87: # %bb.0: # %entry ; X87-NEXT: fildll {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl ; ; X86-SSE-LABEL: siffl: @@ -2244,6 +2307,7 @@ define float @siffl(i64 %x) #0 { ; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fstps (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2275,6 +2339,7 @@ define double @uifdb(i8 %x) #0 { ; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2287,6 +2352,7 @@ define double @uifdb(i8 %x) #0 { ; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2317,6 +2383,7 @@ define double @uifdw(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2329,6 +2396,7 @@ define double @uifdw(i16 %x) #0 { ; X86-SSE-NEXT: cvtsi2sd %eax, %xmm0 ; X86-SSE-NEXT: movsd %xmm0, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2360,6 +2428,7 @@ define double @uifdi(i32 %x) #0 { ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: movl $0, {{[0-9]+}}(%esp) ; X87-NEXT: fildll (%esp) +; X87-NEXT: wait ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2374,6 +2443,7 @@ define double @uifdi(i32 %x) #0 { ; X86-SSE-NEXT: subsd %xmm0, %xmm1 ; X86-SSE-NEXT: movsd %xmm1, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2415,6 +2485,7 @@ define double @uifdl(i64 %x) #0 { ; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: addl $20, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2431,6 +2502,7 @@ define double @uifdl(i64 %x) #0 { ; X86-SSE-NEXT: addpd %xmm0, %xmm1 ; X86-SSE-NEXT: movlpd %xmm1, (%esp) ; X86-SSE-NEXT: fldl (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $12, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2473,6 +2545,7 @@ define float @uiffb(i8 %x) #0 { ; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2485,6 +2558,7 @@ define float @uiffb(i8 %x) #0 { ; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2515,6 +2589,7 @@ define float @uiffw(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2527,6 +2602,7 @@ define float @uiffw(i16 %x) #0 { ; X86-SSE-NEXT: cvtsi2ss %eax, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2558,6 +2634,7 @@ define float @uiffi(i32 %x) #0 { ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: movl $0, {{[0-9]+}}(%esp) ; X87-NEXT: fildll (%esp) +; X87-NEXT: wait ; X87-NEXT: addl $12, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2574,6 +2651,7 @@ define float @uiffi(i32 %x) #0 { ; X86-SSE-NEXT: cvtsd2ss %xmm1, %xmm0 ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: popl %eax ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl @@ -2615,6 +2693,7 @@ define float @uiffl(i64 %x) #0 { ; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: addl $20, %esp ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -2630,9 +2709,11 @@ define float @uiffl(i64 %x) #0 { ; X86-SSE-NEXT: fildll {{[0-9]+}}(%esp) ; X86-SSE-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; X86-SSE-NEXT: fstps {{[0-9]+}}(%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X86-SSE-NEXT: movss %xmm0, (%esp) ; X86-SSE-NEXT: flds (%esp) +; X86-SSE-NEXT: wait ; X86-SSE-NEXT: addl $20, %esp ; X86-SSE-NEXT: .cfi_def_cfa_offset 4 ; X86-SSE-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll index 45d7d513e6fb..1bc308bef8cc 100644 --- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll +++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll @@ -7,9 +7,11 @@ define float @ceil(float %x) #0 { ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _ceil ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.ceil.f32(float %x, metadata !"fpexcept.strict") #0 @@ -22,9 +24,11 @@ define float @cos(float %x) #0 { ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _cos ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.cos.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -37,9 +41,11 @@ define float @exp(float %x) #0 { ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _exp ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.exp.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -52,9 +58,11 @@ define float @floor(float %x) #0 { ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _floor ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.floor.f32(float %x, metadata !"fpexcept.strict") #0 @@ -70,9 +78,11 @@ define float @frem(float %x, float %y) #0 { ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _fmod ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $20, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.frem.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -85,9 +95,11 @@ define float @log(float %x) #0 { ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _log ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.log.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -100,9 +112,11 @@ define float @log10(float %x) #0 { ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _log10 ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.log10.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -118,9 +132,11 @@ define float @pow(float %x, float %y) #0 { ; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _pow ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $20, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.pow.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 @@ -133,9 +149,11 @@ define float @sin(float %x) #0 { ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: calll _sin ; CHECK-NEXT: fstps {{[0-9]+}}(%esp) ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: retl %result = call float @llvm.experimental.constrained.sin.f32(float %x, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll index f27076028198..115e16583bf4 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-cmp.ll @@ -52,6 +52,7 @@ define i32 @test_f32_oeq_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -71,6 +72,7 @@ define i32 @test_f32_oeq_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -124,6 +126,7 @@ define i32 @test_f32_ogt_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -143,6 +146,7 @@ define i32 @test_f32_ogt_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -195,6 +199,7 @@ define i32 @test_f32_oge_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -214,6 +219,7 @@ define i32 @test_f32_oge_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -266,6 +272,7 @@ define i32 @test_f32_olt_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -285,6 +292,7 @@ define i32 @test_f32_olt_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -337,6 +345,7 @@ define i32 @test_f32_ole_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -356,6 +365,7 @@ define i32 @test_f32_ole_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -408,6 +418,7 @@ define i32 @test_f32_one_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -427,6 +438,7 @@ define i32 @test_f32_one_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -479,6 +491,7 @@ define i32 @test_f32_ord_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -498,6 +511,7 @@ define i32 @test_f32_ord_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnpl %eax, %ecx @@ -550,6 +564,7 @@ define i32 @test_f32_ueq_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -569,6 +584,7 @@ define i32 @test_f32_ueq_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovel %eax, %ecx @@ -621,6 +637,7 @@ define i32 @test_f32_ugt_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -640,6 +657,7 @@ define i32 @test_f32_ugt_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -692,6 +710,7 @@ define i32 @test_f32_uge_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -711,6 +730,7 @@ define i32 @test_f32_uge_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -763,6 +783,7 @@ define i32 @test_f32_ult_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -782,6 +803,7 @@ define i32 @test_f32_ult_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -834,6 +856,7 @@ define i32 @test_f32_ule_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -853,6 +876,7 @@ define i32 @test_f32_ule_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -909,6 +933,7 @@ define i32 @test_f32_une_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -928,6 +953,7 @@ define i32 @test_f32_une_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -981,6 +1007,7 @@ define i32 @test_f32_uno_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1000,6 +1027,7 @@ define i32 @test_f32_uno_q(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovpl %eax, %ecx @@ -1056,6 +1084,7 @@ define i32 @test_f64_oeq_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1075,6 +1104,7 @@ define i32 @test_f64_oeq_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -1128,6 +1158,7 @@ define i32 @test_f64_ogt_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1147,6 +1178,7 @@ define i32 @test_f64_ogt_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -1199,6 +1231,7 @@ define i32 @test_f64_oge_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1218,6 +1251,7 @@ define i32 @test_f64_oge_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -1270,6 +1304,7 @@ define i32 @test_f64_olt_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1289,6 +1324,7 @@ define i32 @test_f64_olt_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -1341,6 +1377,7 @@ define i32 @test_f64_ole_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1360,6 +1397,7 @@ define i32 @test_f64_ole_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -1412,6 +1450,7 @@ define i32 @test_f64_one_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1431,6 +1470,7 @@ define i32 @test_f64_one_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -1483,6 +1523,7 @@ define i32 @test_f64_ord_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1502,6 +1543,7 @@ define i32 @test_f64_ord_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnpl %eax, %ecx @@ -1554,6 +1596,7 @@ define i32 @test_f64_ueq_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1573,6 +1616,7 @@ define i32 @test_f64_ueq_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovel %eax, %ecx @@ -1625,6 +1669,7 @@ define i32 @test_f64_ugt_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1644,6 +1689,7 @@ define i32 @test_f64_ugt_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -1696,6 +1742,7 @@ define i32 @test_f64_uge_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1715,6 +1762,7 @@ define i32 @test_f64_uge_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -1767,6 +1815,7 @@ define i32 @test_f64_ult_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1786,6 +1835,7 @@ define i32 @test_f64_ult_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -1838,6 +1888,7 @@ define i32 @test_f64_ule_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1857,6 +1908,7 @@ define i32 @test_f64_ule_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -1913,6 +1965,7 @@ define i32 @test_f64_une_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -1932,6 +1985,7 @@ define i32 @test_f64_une_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -1985,6 +2039,7 @@ define i32 @test_f64_uno_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fucompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2004,6 +2059,7 @@ define i32 @test_f64_uno_q(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fucompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovpl %eax, %ecx @@ -2060,6 +2116,7 @@ define i32 @test_f32_oeq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2079,6 +2136,7 @@ define i32 @test_f32_oeq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -2132,6 +2190,7 @@ define i32 @test_f32_ogt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2151,6 +2210,7 @@ define i32 @test_f32_ogt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -2203,6 +2263,7 @@ define i32 @test_f32_oge_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2222,6 +2283,7 @@ define i32 @test_f32_oge_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -2274,6 +2336,7 @@ define i32 @test_f32_olt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2293,6 +2356,7 @@ define i32 @test_f32_olt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -2345,6 +2409,7 @@ define i32 @test_f32_ole_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2364,6 +2429,7 @@ define i32 @test_f32_ole_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -2416,6 +2482,7 @@ define i32 @test_f32_one_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2435,6 +2502,7 @@ define i32 @test_f32_one_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -2487,6 +2555,7 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2506,6 +2575,7 @@ define i32 @test_f32_ord_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnpl %eax, %ecx @@ -2558,6 +2628,7 @@ define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2577,6 +2648,7 @@ define i32 @test_f32_ueq_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovel %eax, %ecx @@ -2629,6 +2701,7 @@ define i32 @test_f32_ugt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2648,6 +2721,7 @@ define i32 @test_f32_ugt_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -2700,6 +2774,7 @@ define i32 @test_f32_uge_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2719,6 +2794,7 @@ define i32 @test_f32_uge_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -2771,6 +2847,7 @@ define i32 @test_f32_ult_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2790,6 +2867,7 @@ define i32 @test_f32_ult_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -2842,6 +2920,7 @@ define i32 @test_f32_ule_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2861,6 +2940,7 @@ define i32 @test_f32_ule_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -2917,6 +2997,7 @@ define i32 @test_f32_une_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -2936,6 +3017,7 @@ define i32 @test_f32_une_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -2989,6 +3071,7 @@ define i32 @test_f32_uno_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3008,6 +3091,7 @@ define i32 @test_f32_uno_s(i32 %a, i32 %b, float %f1, float %f2) #0 { ; X87-CMOV-NEXT: flds {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovpl %eax, %ecx @@ -3064,6 +3148,7 @@ define i32 @test_f64_oeq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3083,6 +3168,7 @@ define i32 @test_f64_oeq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -3136,6 +3222,7 @@ define i32 @test_f64_ogt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3155,6 +3242,7 @@ define i32 @test_f64_ogt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -3207,6 +3295,7 @@ define i32 @test_f64_oge_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3226,6 +3315,7 @@ define i32 @test_f64_oge_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -3278,6 +3368,7 @@ define i32 @test_f64_olt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3297,6 +3388,7 @@ define i32 @test_f64_olt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmoval %eax, %ecx @@ -3349,6 +3441,7 @@ define i32 @test_f64_ole_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3368,6 +3461,7 @@ define i32 @test_f64_ole_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovael %eax, %ecx @@ -3420,6 +3514,7 @@ define i32 @test_f64_one_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3439,6 +3534,7 @@ define i32 @test_f64_one_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -3491,6 +3587,7 @@ define i32 @test_f64_ord_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3510,6 +3607,7 @@ define i32 @test_f64_ord_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnpl %eax, %ecx @@ -3562,6 +3660,7 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3581,6 +3680,7 @@ define i32 @test_f64_ueq_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovel %eax, %ecx @@ -3633,6 +3733,7 @@ define i32 @test_f64_ugt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3652,6 +3753,7 @@ define i32 @test_f64_ugt_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -3704,6 +3806,7 @@ define i32 @test_f64_uge_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3723,6 +3826,7 @@ define i32 @test_f64_uge_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -3775,6 +3879,7 @@ define i32 @test_f64_ult_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3794,6 +3899,7 @@ define i32 @test_f64_ult_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbl %eax, %ecx @@ -3846,6 +3952,7 @@ define i32 @test_f64_ule_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3865,6 +3972,7 @@ define i32 @test_f64_ule_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovbel %eax, %ecx @@ -3921,6 +4029,7 @@ define i32 @test_f64_une_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -3940,6 +4049,7 @@ define i32 @test_f64_une_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovnel %eax, %ecx @@ -3993,6 +4103,7 @@ define i32 @test_f64_uno_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fcompp +; X87-NEXT: wait ; X87-NEXT: fnstsw %ax ; X87-NEXT: # kill: def $ah killed $ah killed $ax ; X87-NEXT: sahf @@ -4012,6 +4123,7 @@ define i32 @test_f64_uno_s(i32 %a, i32 %b, double %f1, double %f2) #0 { ; X87-CMOV-NEXT: fldl {{[0-9]+}}(%esp) ; X87-CMOV-NEXT: fcompi %st(1), %st ; X87-CMOV-NEXT: fstp %st(0) +; X87-CMOV-NEXT: wait ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %eax ; X87-CMOV-NEXT: leal {{[0-9]+}}(%esp), %ecx ; X87-CMOV-NEXT: cmovpl %eax, %ecx diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll index c50d092aba10..c3576addfcdd 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-fptoint.ll @@ -59,6 +59,7 @@ define i1 @fptosi_f32toi1(float %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -105,6 +106,7 @@ define i8 @fptosi_f32toi8(float %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -151,6 +153,7 @@ define i16 @fptosi_f32toi16(float %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -193,6 +196,7 @@ define i32 @fptosi_f32toi32(float %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw (%esp) ; CHECK-NEXT: movzwl (%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -222,6 +226,7 @@ define i64 @fptosi_f32toi64(float %x) #0 { ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: flds {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -254,6 +259,7 @@ define i64 @fptosi_f32toi64(float %x) #0 { ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) ; AVX-X86-NEXT: fisttpll (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl (%esp), %eax ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX-X86-NEXT: movl %ebp, %esp @@ -276,6 +282,7 @@ define i64 @fptosi_f32toi64(float %x) #0 { ; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: flds 8(%ebp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -324,6 +331,7 @@ define i1 @fptoui_f32toi1(float %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -370,6 +378,7 @@ define i8 @fptoui_f32toi8(float %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -416,6 +425,7 @@ define i16 @fptoui_f32toi16(float %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw (%esp) ; CHECK-NEXT: movzwl (%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -471,6 +481,7 @@ define i32 @fptoui_f32toi32(float %x) #0 { ; AVX1-X86-NEXT: vmovss %xmm0, (%esp) ; AVX1-X86-NEXT: flds (%esp) ; AVX1-X86-NEXT: fisttpll (%esp) +; AVX1-X86-NEXT: wait ; AVX1-X86-NEXT: movl (%esp), %eax ; AVX1-X86-NEXT: movl %ebp, %esp ; AVX1-X86-NEXT: popl %ebp @@ -503,6 +514,7 @@ define i32 @fptoui_f32toi32(float %x) #0 { ; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: flds 8(%ebp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -542,6 +554,7 @@ define i64 @fptoui_f32toi64(float %x) #0 { ; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: setbe %al ; SSE-X86-NEXT: flds {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -596,6 +609,7 @@ define i64 @fptoui_f32toi64(float %x) #0 { ; AVX1-X86-NEXT: vmovss %xmm0, (%esp) ; AVX1-X86-NEXT: flds (%esp) ; AVX1-X86-NEXT: fisttpll (%esp) +; AVX1-X86-NEXT: wait ; AVX1-X86-NEXT: setbe %al ; AVX1-X86-NEXT: movzbl %al, %edx ; AVX1-X86-NEXT: shll $31, %edx @@ -644,6 +658,7 @@ define i64 @fptoui_f32toi64(float %x) #0 { ; AVX512-X86-NEXT: vmovss %xmm0, (%esp) ; AVX512-X86-NEXT: flds (%esp) ; AVX512-X86-NEXT: fisttpll (%esp) +; AVX512-X86-NEXT: wait ; AVX512-X86-NEXT: setbe %dl ; AVX512-X86-NEXT: shll $31, %edx ; AVX512-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -670,6 +685,7 @@ define i64 @fptoui_f32toi64(float %x) #0 { ; CHECK-NEXT: flds 8(%ebp) ; CHECK-NEXT: flds {{\.LCPI.*}} ; CHECK-NEXT: fcom %st(1) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax @@ -684,6 +700,7 @@ define i64 @fptoui_f32toi64(float %x) #0 { ; CHECK-NEXT: .LBB9_2: ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fsubrp %st, %st(1) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -734,6 +751,7 @@ define i8 @fptosi_f64toi8(double %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -780,6 +798,7 @@ define i16 @fptosi_f64toi16(double %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -822,6 +841,7 @@ define i32 @fptosi_f64toi32(double %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw (%esp) ; CHECK-NEXT: movzwl (%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -851,6 +871,7 @@ define i64 @fptosi_f64toi64(double %x) #0 { ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE-X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -883,6 +904,7 @@ define i64 @fptosi_f64toi64(double %x) #0 { ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) ; AVX-X86-NEXT: fisttpll (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl (%esp), %eax ; AVX-X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; AVX-X86-NEXT: movl %ebp, %esp @@ -905,6 +927,7 @@ define i64 @fptosi_f64toi64(double %x) #0 { ; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: fldl 8(%ebp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -953,6 +976,7 @@ define i1 @fptoui_f64toi1(double %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -999,6 +1023,7 @@ define i8 @fptoui_f64toi8(double %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1045,6 +1070,7 @@ define i16 @fptoui_f64toi16(double %x) #0 { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: fldl {{[0-9]+}}(%esp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw (%esp) ; CHECK-NEXT: movzwl (%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1100,6 +1126,7 @@ define i32 @fptoui_f64toi32(double %x) #0 { ; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX1-X86-NEXT: fldl (%esp) ; AVX1-X86-NEXT: fisttpll (%esp) +; AVX1-X86-NEXT: wait ; AVX1-X86-NEXT: movl (%esp), %eax ; AVX1-X86-NEXT: movl %ebp, %esp ; AVX1-X86-NEXT: popl %ebp @@ -1132,6 +1159,7 @@ define i32 @fptoui_f64toi32(double %x) #0 { ; CHECK-NEXT: andl $-8, %esp ; CHECK-NEXT: subl $16, %esp ; CHECK-NEXT: fldl 8(%ebp) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1171,6 +1199,7 @@ define i64 @fptoui_f64toi64(double %x) #0 { ; SSE-X86-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; SSE-X86-NEXT: setbe %al ; SSE-X86-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; SSE-X86-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1225,6 +1254,7 @@ define i64 @fptoui_f64toi64(double %x) #0 { ; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX1-X86-NEXT: fldl (%esp) ; AVX1-X86-NEXT: fisttpll (%esp) +; AVX1-X86-NEXT: wait ; AVX1-X86-NEXT: setbe %al ; AVX1-X86-NEXT: movzbl %al, %edx ; AVX1-X86-NEXT: shll $31, %edx @@ -1273,6 +1303,7 @@ define i64 @fptoui_f64toi64(double %x) #0 { ; AVX512-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX512-X86-NEXT: fldl (%esp) ; AVX512-X86-NEXT: fisttpll (%esp) +; AVX512-X86-NEXT: wait ; AVX512-X86-NEXT: setbe %dl ; AVX512-X86-NEXT: shll $31, %edx ; AVX512-X86-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -1299,6 +1330,7 @@ define i64 @fptoui_f64toi64(double %x) #0 { ; CHECK-NEXT: fldl 8(%ebp) ; CHECK-NEXT: flds {{\.LCPI.*}} ; CHECK-NEXT: fcom %st(1) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstsw %ax ; CHECK-NEXT: xorl %edx, %edx ; CHECK-NEXT: # kill: def $ah killed $ah killed $ax @@ -1313,6 +1345,7 @@ define i64 @fptoui_f64toi64(double %x) #0 { ; CHECK-NEXT: .LBB18_2: ; CHECK-NEXT: fstp %st(1) ; CHECK-NEXT: fsubrp %st, %st(1) +; CHECK-NEXT: wait ; CHECK-NEXT: fnstcw {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: orl $3072, %ecx # imm = 0xC00 diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll index 887185af8121..0a50f3df2ac2 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-inttofp.ll @@ -41,6 +41,7 @@ define float @sitofp_i1tof32(i1 %x) #0 { ; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -64,6 +65,7 @@ define float @sitofp_i1tof32(i1 %x) #0 { ; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -86,6 +88,7 @@ define float @sitofp_i1tof32(i1 %x) #0 { ; X87-NEXT: movsbl %al, %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -104,6 +107,7 @@ define float @sitofp_i8tof32(i8 %x) #0 { ; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -122,6 +126,7 @@ define float @sitofp_i8tof32(i8 %x) #0 { ; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -139,6 +144,7 @@ define float @sitofp_i8tof32(i8 %x) #0 { ; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -157,6 +163,7 @@ define float @sitofp_i16tof32(i16 %x) #0 { ; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -175,6 +182,7 @@ define float @sitofp_i16tof32(i16 %x) #0 { ; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -192,6 +200,7 @@ define float @sitofp_i16tof32(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -209,6 +218,7 @@ define float @sitofp_i32tof32(i32 %x) #0 { ; SSE-X86-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -225,6 +235,7 @@ define float @sitofp_i32tof32(i32 %x) #0 { ; AVX-X86-NEXT: vcvtsi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -241,6 +252,7 @@ define float @sitofp_i32tof32(i32 %x) #0 { ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -258,6 +270,7 @@ define float @sitofp_i64tof32(i64 %x) #0 { ; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp) ; SSE-X86-NEXT: fstps (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -274,6 +287,7 @@ define float @sitofp_i64tof32(i64 %x) #0 { ; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-X86-NEXT: fstps (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -286,6 +300,7 @@ define float @sitofp_i64tof32(i64 %x) #0 { ; X87-LABEL: sitofp_i64tof32: ; X87: # %bb.0: ; X87-NEXT: fildll {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %result = call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %x, metadata !"round.dynamic", @@ -304,6 +319,7 @@ define float @uitofp_i1tof32(i1 %x) #0 { ; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -324,6 +340,7 @@ define float @uitofp_i1tof32(i1 %x) #0 { ; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -343,6 +360,7 @@ define float @uitofp_i1tof32(i1 %x) #0 { ; X87-NEXT: movzbl %al, %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -361,6 +379,7 @@ define float @uitofp_i8tof32(i8 %x) #0 { ; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -379,6 +398,7 @@ define float @uitofp_i8tof32(i8 %x) #0 { ; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -396,6 +416,7 @@ define float @uitofp_i8tof32(i8 %x) #0 { ; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -414,6 +435,7 @@ define float @uitofp_i16tof32(i16 %x) #0 { ; SSE-X86-NEXT: cvtsi2ss %eax, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -432,6 +454,7 @@ define float @uitofp_i16tof32(i16 %x) #0 { ; AVX-X86-NEXT: vcvtsi2ss %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -449,6 +472,7 @@ define float @uitofp_i16tof32(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -471,6 +495,7 @@ define float @uitofp_i32tof32(i32 %x) #0 { ; SSE-X86-NEXT: cvtsd2ss %xmm1, %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE-X86-NEXT: retl @@ -492,6 +517,7 @@ define float @uitofp_i32tof32(i32 %x) #0 { ; AVX1-X86-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 ; AVX1-X86-NEXT: vmovss %xmm0, (%esp) ; AVX1-X86-NEXT: flds (%esp) +; AVX1-X86-NEXT: wait ; AVX1-X86-NEXT: popl %eax ; AVX1-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX1-X86-NEXT: retl @@ -509,6 +535,7 @@ define float @uitofp_i32tof32(i32 %x) #0 { ; AVX512-X86-NEXT: vcvtusi2ssl {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512-X86-NEXT: vmovss %xmm0, (%esp) ; AVX512-X86-NEXT: flds (%esp) +; AVX512-X86-NEXT: wait ; AVX512-X86-NEXT: popl %eax ; AVX512-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX512-X86-NEXT: retl @@ -531,6 +558,7 @@ define float @uitofp_i32tof32(i32 %x) #0 { ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: movl $0, {{[0-9]+}}(%esp) ; X87-NEXT: fildll (%esp) +; X87-NEXT: wait ; X87-NEXT: movl %ebp, %esp ; X87-NEXT: popl %ebp ; X87-NEXT: .cfi_def_cfa %esp, 4 @@ -558,9 +586,11 @@ define float @uitofp_i64tof32(i64 %x) #0 { ; SSE-X86-NEXT: fildll {{[0-9]+}}(%esp) ; SSE-X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -598,9 +628,11 @@ define float @uitofp_i64tof32(i64 %x) #0 { ; AVX-X86-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-X86-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; AVX-X86-NEXT: fstps {{[0-9]+}}(%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -645,6 +677,7 @@ define float @uitofp_i64tof32(i64 %x) #0 { ; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: flds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: movl %ebp, %esp ; X87-NEXT: popl %ebp ; X87-NEXT: .cfi_def_cfa %esp, 4 @@ -669,6 +702,7 @@ define double @sitofp_i8tof64(i8 %x) #0 { ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -693,6 +727,7 @@ define double @sitofp_i8tof64(i8 %x) #0 { ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -711,6 +746,7 @@ define double @sitofp_i8tof64(i8 %x) #0 { ; X87-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -734,6 +770,7 @@ define double @sitofp_i16tof64(i16 %x) #0 { ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -758,6 +795,7 @@ define double @sitofp_i16tof64(i16 %x) #0 { ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -776,6 +814,7 @@ define double @sitofp_i16tof64(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -798,6 +837,7 @@ define double @sitofp_i32tof64(i32 %x) #0 { ; SSE-X86-NEXT: cvtsi2sdl 8(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -820,6 +860,7 @@ define double @sitofp_i32tof64(i32 %x) #0 { ; AVX-X86-NEXT: vcvtsi2sdl 8(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -837,6 +878,7 @@ define double @sitofp_i32tof64(i32 %x) #0 { ; X87-NEXT: movl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -859,6 +901,7 @@ define double @sitofp_i64tof64(i64 %x) #0 { ; SSE-X86-NEXT: fildll 8(%ebp) ; SSE-X86-NEXT: fstpl (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -881,6 +924,7 @@ define double @sitofp_i64tof64(i64 %x) #0 { ; AVX-X86-NEXT: fildll 8(%ebp) ; AVX-X86-NEXT: fstpl (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -894,6 +938,7 @@ define double @sitofp_i64tof64(i64 %x) #0 { ; X87-LABEL: sitofp_i64tof64: ; X87: # %bb.0: ; X87-NEXT: fildll {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %result = call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %x, metadata !"round.dynamic", @@ -917,6 +962,7 @@ define double @uitofp_i1tof64(i1 %x) #0 { ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -943,6 +989,7 @@ define double @uitofp_i1tof64(i1 %x) #0 { ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -963,6 +1010,7 @@ define double @uitofp_i1tof64(i1 %x) #0 { ; X87-NEXT: movzbl %al, %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -986,6 +1034,7 @@ define double @uitofp_i8tof64(i8 %x) #0 { ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1010,6 +1059,7 @@ define double @uitofp_i8tof64(i8 %x) #0 { ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1028,6 +1078,7 @@ define double @uitofp_i8tof64(i8 %x) #0 { ; X87-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X87-NEXT: filds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -1051,6 +1102,7 @@ define double @uitofp_i16tof64(i16 %x) #0 { ; SSE-X86-NEXT: cvtsi2sd %eax, %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1075,6 +1127,7 @@ define double @uitofp_i16tof64(i16 %x) #0 { ; AVX-X86-NEXT: vcvtsi2sd %eax, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1093,6 +1146,7 @@ define double @uitofp_i16tof64(i16 %x) #0 { ; X87-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: fildl (%esp) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: .cfi_def_cfa_offset 4 ; X87-NEXT: retl @@ -1118,6 +1172,7 @@ define double @uitofp_i32tof64(i32 %x) #0 { ; SSE-X86-NEXT: subsd %xmm0, %xmm1 ; SSE-X86-NEXT: movsd %xmm1, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1144,6 +1199,7 @@ define double @uitofp_i32tof64(i32 %x) #0 { ; AVX1-X86-NEXT: vsubsd %xmm0, %xmm1, %xmm0 ; AVX1-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX1-X86-NEXT: fldl (%esp) +; AVX1-X86-NEXT: wait ; AVX1-X86-NEXT: movl %ebp, %esp ; AVX1-X86-NEXT: popl %ebp ; AVX1-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1167,6 +1223,7 @@ define double @uitofp_i32tof64(i32 %x) #0 { ; AVX512-X86-NEXT: vcvtusi2sdl 8(%ebp), %xmm0, %xmm0 ; AVX512-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX512-X86-NEXT: fldl (%esp) +; AVX512-X86-NEXT: wait ; AVX512-X86-NEXT: movl %ebp, %esp ; AVX512-X86-NEXT: popl %ebp ; AVX512-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1190,6 +1247,7 @@ define double @uitofp_i32tof64(i32 %x) #0 { ; X87-NEXT: movl %eax, (%esp) ; X87-NEXT: movl $0, {{[0-9]+}}(%esp) ; X87-NEXT: fildll (%esp) +; X87-NEXT: wait ; X87-NEXT: movl %ebp, %esp ; X87-NEXT: popl %ebp ; X87-NEXT: .cfi_def_cfa %esp, 4 @@ -1218,6 +1276,7 @@ define double @uitofp_i64tof64(i64 %x) #0 { ; SSE-X86-NEXT: addpd %xmm0, %xmm1 ; SSE-X86-NEXT: movlpd %xmm1, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1249,6 +1308,7 @@ define double @uitofp_i64tof64(i64 %x) #0 { ; AVX-X86-NEXT: vaddpd %xmm0, %xmm1, %xmm0 ; AVX-X86-NEXT: vmovlpd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -1286,6 +1346,7 @@ define double @uitofp_i64tof64(i64 %x) #0 { ; X87-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fldl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: movl %ebp, %esp ; X87-NEXT: popl %ebp ; X87-NEXT: .cfi_def_cfa %esp, 4 diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll index 966be02124d8..26137bd76a9f 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll @@ -26,6 +26,7 @@ define float @fceil32(float %f) #0 { ; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0 ; SSE41-X86-NEXT: movss %xmm0, (%esp) ; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: popl %eax ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl @@ -43,6 +44,7 @@ define float @fceil32(float %f) #0 { ; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -70,6 +72,7 @@ define double @fceilf64(double %f) #0 { ; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: movl %ebp, %esp ; SSE41-X86-NEXT: popl %ebp ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -93,6 +96,7 @@ define double @fceilf64(double %f) #0 { ; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -116,6 +120,7 @@ define float @ffloor32(float %f) #0 { ; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0 ; SSE41-X86-NEXT: movss %xmm0, (%esp) ; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: popl %eax ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl @@ -133,6 +138,7 @@ define float @ffloor32(float %f) #0 { ; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -160,6 +166,7 @@ define double @ffloorf64(double %f) #0 { ; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: movl %ebp, %esp ; SSE41-X86-NEXT: popl %ebp ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -183,6 +190,7 @@ define double @ffloorf64(double %f) #0 { ; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -206,6 +214,7 @@ define float @ftrunc32(float %f) #0 { ; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0 ; SSE41-X86-NEXT: movss %xmm0, (%esp) ; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: popl %eax ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl @@ -223,6 +232,7 @@ define float @ftrunc32(float %f) #0 { ; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -250,6 +260,7 @@ define double @ftruncf64(double %f) #0 { ; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: movl %ebp, %esp ; SSE41-X86-NEXT: popl %ebp ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -273,6 +284,7 @@ define double @ftruncf64(double %f) #0 { ; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -296,6 +308,7 @@ define float @frint32(float %f) #0 { ; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0 ; SSE41-X86-NEXT: movss %xmm0, (%esp) ; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: popl %eax ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl @@ -313,6 +326,7 @@ define float @frint32(float %f) #0 { ; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -341,6 +355,7 @@ define double @frintf64(double %f) #0 { ; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: movl %ebp, %esp ; SSE41-X86-NEXT: popl %ebp ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -364,6 +379,7 @@ define double @frintf64(double %f) #0 { ; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -388,6 +404,7 @@ define float @fnearbyint32(float %f) #0 { ; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0 ; SSE41-X86-NEXT: movss %xmm0, (%esp) ; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: popl %eax ; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 ; SSE41-X86-NEXT: retl @@ -405,6 +422,7 @@ define float @fnearbyint32(float %f) #0 { ; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: .cfi_def_cfa_offset 4 ; AVX-X86-NEXT: retl @@ -433,6 +451,7 @@ define double @fnearbyintf64(double %f) #0 { ; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0 ; SSE41-X86-NEXT: movsd %xmm0, (%esp) ; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: wait ; SSE41-X86-NEXT: movl %ebp, %esp ; SSE41-X86-NEXT: popl %ebp ; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 @@ -456,6 +475,7 @@ define double @fnearbyintf64(double %f) #0 { ; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar.ll b/llvm/test/CodeGen/X86/fp-strict-scalar.ll index c864acda3202..fbcde4aa4cd2 100644 --- a/llvm/test/CodeGen/X86/fp-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp-strict-scalar.ll @@ -33,6 +33,7 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: addsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl @@ -52,6 +53,7 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: vaddsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl @@ -65,6 +67,7 @@ define double @fadd_f64(double %a, double %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: faddl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fadd.f64(double %a, double %b, metadata !"round.dynamic", @@ -80,6 +83,7 @@ define float @fadd_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: addss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; @@ -95,6 +99,7 @@ define float @fadd_f32(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; @@ -107,6 +112,7 @@ define float @fadd_f32(float %a, float %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fadds {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fadd.f32(float %a, float %b, metadata !"round.dynamic", @@ -125,6 +131,7 @@ define double @fsub_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: subsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl @@ -144,6 +151,7 @@ define double @fsub_f64(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: vsubsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl @@ -157,6 +165,7 @@ define double @fsub_f64(double %a, double %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fsubl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fsub.f64(double %a, double %b, metadata !"round.dynamic", @@ -172,6 +181,7 @@ define float @fsub_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: subss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; @@ -187,6 +197,7 @@ define float @fsub_f32(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: vsubss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; @@ -199,6 +210,7 @@ define float @fsub_f32(float %a, float %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fsubs {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fsub.f32(float %a, float %b, metadata !"round.dynamic", @@ -217,6 +229,7 @@ define double @fmul_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: mulsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl @@ -236,6 +249,7 @@ define double @fmul_f64(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: vmulsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl @@ -249,6 +263,7 @@ define double @fmul_f64(double %a, double %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fmull {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fmul.f64(double %a, double %b, metadata !"round.dynamic", @@ -264,6 +279,7 @@ define float @fmul_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: mulss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; @@ -279,6 +295,7 @@ define float @fmul_f32(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; @@ -291,6 +308,7 @@ define float @fmul_f32(float %a, float %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fmuls {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fmul.f32(float %a, float %b, metadata !"round.dynamic", @@ -309,6 +327,7 @@ define double @fdiv_f64(double %a, double %b) nounwind strictfp { ; SSE-X86-NEXT: divsd 16(%ebp), %xmm0 ; SSE-X86-NEXT: movsd %xmm0, (%esp) ; SSE-X86-NEXT: fldl (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movl %ebp, %esp ; SSE-X86-NEXT: popl %ebp ; SSE-X86-NEXT: retl @@ -328,6 +347,7 @@ define double @fdiv_f64(double %a, double %b) nounwind strictfp { ; AVX-X86-NEXT: vdivsd 16(%ebp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovsd %xmm0, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl @@ -341,6 +361,7 @@ define double @fdiv_f64(double %a, double %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: fldl {{[0-9]+}}(%esp) ; X87-NEXT: fdivl {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call double @llvm.experimental.constrained.fdiv.f64(double %a, double %b, metadata !"round.dynamic", @@ -356,6 +377,7 @@ define float @fdiv_f32(float %a, float %b) nounwind strictfp { ; SSE-X86-NEXT: divss {{[0-9]+}}(%esp), %xmm0 ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: flds (%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: popl %eax ; SSE-X86-NEXT: retl ; @@ -371,6 +393,7 @@ define float @fdiv_f32(float %a, float %b) nounwind strictfp { ; AVX-X86-NEXT: vdivss {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-X86-NEXT: vmovss %xmm0, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; @@ -383,6 +406,7 @@ define float @fdiv_f32(float %a, float %b) nounwind strictfp { ; X87: # %bb.0: ; X87-NEXT: flds {{[0-9]+}}(%esp) ; X87-NEXT: fdivs {{[0-9]+}}(%esp) +; X87-NEXT: wait ; X87-NEXT: retl %ret = call float @llvm.experimental.constrained.fdiv.f32(float %a, float %b, metadata !"round.dynamic", @@ -429,6 +453,7 @@ define void @fpext_f32_to_f64(float* %val, double* %ret) nounwind strictfp { ; X87-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X87-NEXT: flds (%ecx) ; X87-NEXT: fstpl (%eax) +; X87-NEXT: wait ; X87-NEXT: retl %1 = load float, float* %val, align 4 %res = call double @llvm.experimental.constrained.fpext.f64.f32(float %1, @@ -479,6 +504,7 @@ define void @fptrunc_double_to_f32(double* %val, float *%ret) nounwind strictfp ; X87-NEXT: fstps (%esp) ; X87-NEXT: flds (%esp) ; X87-NEXT: fstps (%eax) +; X87-NEXT: wait ; X87-NEXT: popl %eax ; X87-NEXT: retl %1 = load double, double* %val, align 8 @@ -526,6 +552,7 @@ define void @fsqrt_f64(double* %a) nounwind strictfp { ; X87-NEXT: fldl (%eax) ; X87-NEXT: fsqrt ; X87-NEXT: fstpl (%eax) +; X87-NEXT: wait ; X87-NEXT: retl %1 = load double, double* %a, align 8 %res = call double @llvm.experimental.constrained.sqrt.f64(double %1, @@ -572,6 +599,7 @@ define void @fsqrt_f32(float* %a) nounwind strictfp { ; X87-NEXT: flds (%eax) ; X87-NEXT: fsqrt ; X87-NEXT: fstps (%eax) +; X87-NEXT: wait ; X87-NEXT: retl %1 = load float, float* %a, align 4 %res = call float @llvm.experimental.constrained.sqrt.f32(float %1, @@ -613,6 +641,7 @@ define double @fma_f64(double %a, double %b, double %c) nounwind strictfp { ; AVX-X86-NEXT: vfmadd213sd {{.*#+}} xmm1 = (xmm0 * xmm1) + mem ; AVX-X86-NEXT: vmovsd %xmm1, (%esp) ; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: movl %ebp, %esp ; AVX-X86-NEXT: popl %ebp ; AVX-X86-NEXT: retl @@ -631,6 +660,7 @@ define double @fma_f64(double %a, double %b, double %c) nounwind strictfp { ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl {{[0-9]+}}(%esp) ; X87-NEXT: fstpl (%esp) +; X87-NEXT: wait ; X87-NEXT: calll fma ; X87-NEXT: addl $24, %esp ; X87-NEXT: retl @@ -669,6 +699,7 @@ define float @fma_f32(float %a, float %b, float %c) nounwind strictfp { ; AVX-X86-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm0 * xmm1) + mem ; AVX-X86-NEXT: vmovss %xmm1, (%esp) ; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: wait ; AVX-X86-NEXT: popl %eax ; AVX-X86-NEXT: retl ; @@ -686,6 +717,7 @@ define float @fma_f32(float %a, float %b, float %c) nounwind strictfp { ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: fstps {{[0-9]+}}(%esp) ; X87-NEXT: fstps (%esp) +; X87-NEXT: wait ; X87-NEXT: calll fmaf ; X87-NEXT: addl $12, %esp ; X87-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll index 048656f48b62..ce5b09f8d411 100644 --- a/llvm/test/CodeGen/X86/fp128-cast-strict.ll +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -39,6 +39,7 @@ define void @TestFPExtF32_F128() nounwind strictfp { ; X86-NEXT: subl $24, %esp ; X86-NEXT: flds vf32 ; X86-NEXT: fstps {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __extendsftf2 @@ -86,6 +87,7 @@ define void @TestFPExtF64_F128() nounwind strictfp { ; X86-NEXT: subl $40, %esp ; X86-NEXT: fldl vf64 ; X86-NEXT: fstpl {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __extenddftf2 @@ -114,6 +116,7 @@ define void @TestFPExtF80_F128() nounwind strictfp { ; X64-SSE-NEXT: subq $24, %rsp ; X64-SSE-NEXT: fldt {{.*}}(%rip) ; X64-SSE-NEXT: fstpt (%rsp) +; X64-SSE-NEXT: wait ; X64-SSE-NEXT: callq __extendxftf2 ; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) ; X64-SSE-NEXT: addq $24, %rsp @@ -124,6 +127,7 @@ define void @TestFPExtF80_F128() nounwind strictfp { ; X64-AVX-NEXT: subq $24, %rsp ; X64-AVX-NEXT: fldt {{.*}}(%rip) ; X64-AVX-NEXT: fstpt (%rsp) +; X64-AVX-NEXT: wait ; X64-AVX-NEXT: callq __extendxftf2 ; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) ; X64-AVX-NEXT: addq $24, %rsp @@ -135,6 +139,7 @@ define void @TestFPExtF80_F128() nounwind strictfp { ; X86-NEXT: subl $40, %esp ; X86-NEXT: fldt vf80 ; X86-NEXT: fstpt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: leal {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: calll __extendxftf2 @@ -186,6 +191,7 @@ define void @TestFPTruncF128_F32() nounwind strictfp { ; X86-NEXT: calll __trunctfsf2 ; X86-NEXT: addl $16, %esp ; X86-NEXT: fstps vf32 +; X86-NEXT: wait ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl entry: @@ -224,6 +230,7 @@ define void @TestFPTruncF128_F64() nounwind strictfp { ; X86-NEXT: calll __trunctfdf2 ; X86-NEXT: addl $16, %esp ; X86-NEXT: fstpl vf64 +; X86-NEXT: wait ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl entry: @@ -240,6 +247,7 @@ define void @TestFPTruncF128_F80() nounwind strictfp { ; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 ; X64-SSE-NEXT: callq __trunctfxf2 ; X64-SSE-NEXT: fstpt {{.*}}(%rip) +; X64-SSE-NEXT: wait ; X64-SSE-NEXT: popq %rax ; X64-SSE-NEXT: retq ; @@ -249,6 +257,7 @@ define void @TestFPTruncF128_F80() nounwind strictfp { ; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 ; X64-AVX-NEXT: callq __trunctfxf2 ; X64-AVX-NEXT: fstpt {{.*}}(%rip) +; X64-AVX-NEXT: wait ; X64-AVX-NEXT: popq %rax ; X64-AVX-NEXT: retq ; @@ -262,6 +271,7 @@ define void @TestFPTruncF128_F80() nounwind strictfp { ; X86-NEXT: calll __trunctfxf2 ; X86-NEXT: addl $16, %esp ; X86-NEXT: fstpt vf80 +; X86-NEXT: wait ; X86-NEXT: addl $12, %esp ; X86-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll index 7e5896f04486..1e38b6744f3c 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar-cmp.ll @@ -8,6 +8,7 @@ define i32 @test_oeq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -28,6 +29,7 @@ define i32 @test_oeq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnel %esi, %eax ; X87-64-NEXT: cmovpl %esi, %eax ; X87-64-NEXT: retq @@ -44,6 +46,7 @@ define i32 @test_ogt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -64,6 +67,7 @@ define i32 @test_ogt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -79,6 +83,7 @@ define i32 @test_oge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -99,6 +104,7 @@ define i32 @test_oge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -114,6 +120,7 @@ define i32 @test_olt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -134,6 +141,7 @@ define i32 @test_olt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -149,6 +157,7 @@ define i32 @test_ole_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -169,6 +178,7 @@ define i32 @test_ole_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -184,6 +194,7 @@ define i32 @test_one_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -204,6 +215,7 @@ define i32 @test_one_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -219,6 +231,7 @@ define i32 @test_ord_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -239,6 +252,7 @@ define i32 @test_ord_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovpl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -254,6 +268,7 @@ define i32 @test_ueq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -274,6 +289,7 @@ define i32 @test_ueq_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -289,6 +305,7 @@ define i32 @test_ugt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -309,6 +326,7 @@ define i32 @test_ugt_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovael %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -324,6 +342,7 @@ define i32 @test_uge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -344,6 +363,7 @@ define i32 @test_uge_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmoval %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -359,6 +379,7 @@ define i32 @test_ult_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -379,6 +400,7 @@ define i32 @test_ult_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovael %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -394,6 +416,7 @@ define i32 @test_ule_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -414,6 +437,7 @@ define i32 @test_ule_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmoval %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -429,6 +453,7 @@ define i32 @test_une_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -449,6 +474,7 @@ define i32 @test_une_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnel %edi, %eax ; X87-64-NEXT: cmovpl %edi, %eax ; X87-64-NEXT: retq @@ -465,6 +491,7 @@ define i32 @test_uno_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fucompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -485,6 +512,7 @@ define i32 @test_uno_q(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fucompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnpl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmp.x86_fp80( @@ -500,6 +528,7 @@ define i32 @test_oeq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -520,6 +549,7 @@ define i32 @test_oeq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnel %esi, %eax ; X87-64-NEXT: cmovpl %esi, %eax ; X87-64-NEXT: retq @@ -536,6 +566,7 @@ define i32 @test_ogt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -556,6 +587,7 @@ define i32 @test_ogt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -571,6 +603,7 @@ define i32 @test_oge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -591,6 +624,7 @@ define i32 @test_oge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -606,6 +640,7 @@ define i32 @test_olt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -626,6 +661,7 @@ define i32 @test_olt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -641,6 +677,7 @@ define i32 @test_ole_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -661,6 +698,7 @@ define i32 @test_ole_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovbl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -676,6 +714,7 @@ define i32 @test_one_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -696,6 +735,7 @@ define i32 @test_one_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -711,6 +751,7 @@ define i32 @test_ord_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -731,6 +772,7 @@ define i32 @test_ord_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovpl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -746,6 +788,7 @@ define i32 @test_ueq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -766,6 +809,7 @@ define i32 @test_ueq_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnel %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -781,6 +825,7 @@ define i32 @test_ugt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -801,6 +846,7 @@ define i32 @test_ugt_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovael %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -816,6 +862,7 @@ define i32 @test_uge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -836,6 +883,7 @@ define i32 @test_uge_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmoval %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -851,6 +899,7 @@ define i32 @test_ult_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -871,6 +920,7 @@ define i32 @test_ult_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovael %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -886,6 +936,7 @@ define i32 @test_ule_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -906,6 +957,7 @@ define i32 @test_ule_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmoval %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( @@ -921,6 +973,7 @@ define i32 @test_une_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -941,6 +994,7 @@ define i32 @test_une_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnel %edi, %eax ; X87-64-NEXT: cmovpl %edi, %eax ; X87-64-NEXT: retq @@ -957,6 +1011,7 @@ define i32 @test_uno_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fldt {{[0-9]+}}(%esp) ; X87-32-NEXT: fcompp +; X87-32-NEXT: wait ; X87-32-NEXT: fnstsw %ax ; X87-32-NEXT: # kill: def $ah killed $ah killed $ax ; X87-32-NEXT: sahf @@ -977,6 +1032,7 @@ define i32 @test_uno_s(i32 %a, i32 %b, x86_fp80 %f1, x86_fp80 %f2) #0 { ; X87-64-NEXT: fldt {{[0-9]+}}(%rsp) ; X87-64-NEXT: fcompi %st(1), %st ; X87-64-NEXT: fstp %st(0) +; X87-64-NEXT: wait ; X87-64-NEXT: cmovnpl %esi, %eax ; X87-64-NEXT: retq %cond = call i1 @llvm.experimental.constrained.fcmps.x86_fp80( diff --git a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll index 5f03917f56ae..cf4a51fd6920 100644 --- a/llvm/test/CodeGen/X86/fp80-strict-scalar.ll +++ b/llvm/test/CodeGen/X86/fp80-strict-scalar.ll @@ -38,6 +38,7 @@ define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: faddp %st, %st(1) +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: fadd_fp80: @@ -45,6 +46,7 @@ define x86_fp80 @fadd_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: faddp %st, %st(1) +; X64-NEXT: wait ; X64-NEXT: retq %ret = call x86_fp80 @llvm.experimental.constrained.fadd.x86_fp80(x86_fp80 %a, x86_fp80 %b, metadata !"round.dynamic", @@ -58,6 +60,7 @@ define x86_fp80 @fsub_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fsubp %st, %st(1) +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: fsub_fp80: @@ -65,6 +68,7 @@ define x86_fp80 @fsub_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fsubp %st, %st(1) +; X64-NEXT: wait ; X64-NEXT: retq %ret = call x86_fp80 @llvm.experimental.constrained.fsub.x86_fp80(x86_fp80 %a, x86_fp80 %b, metadata !"round.dynamic", @@ -78,6 +82,7 @@ define x86_fp80 @fmul_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fmulp %st, %st(1) +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: fmul_fp80: @@ -85,6 +90,7 @@ define x86_fp80 @fmul_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fmulp %st, %st(1) +; X64-NEXT: wait ; X64-NEXT: retq %ret = call x86_fp80 @llvm.experimental.constrained.fmul.x86_fp80(x86_fp80 %a, x86_fp80 %b, metadata !"round.dynamic", @@ -98,6 +104,7 @@ define x86_fp80 @fdiv_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fdivp %st, %st(1) +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: fdiv_fp80: @@ -105,6 +112,7 @@ define x86_fp80 @fdiv_fp80(x86_fp80 %a, x86_fp80 %b) nounwind strictfp { ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fdivp %st, %st(1) +; X64-NEXT: wait ; X64-NEXT: retq %ret = call x86_fp80 @llvm.experimental.constrained.fdiv.x86_fp80(x86_fp80 %a, x86_fp80 %b, metadata !"round.dynamic", @@ -116,12 +124,14 @@ define x86_fp80 @fpext_f32_to_fp80(float %a) nounwind strictfp { ; X86-LABEL: fpext_f32_to_fp80: ; X86: # %bb.0: ; X86-NEXT: flds {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: fpext_f32_to_fp80: ; X64: # %bb.0: ; X64-NEXT: movss %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: flds -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f32(float %a, metadata !"fpexcept.strict") #0 @@ -132,12 +142,14 @@ define x86_fp80 @fpext_f64_to_fp80(double %a) nounwind strictfp { ; X86-LABEL: fpext_f64_to_fp80: ; X86: # %bb.0: ; X86-NEXT: fldl {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: fpext_f64_to_fp80: ; X64: # %bb.0: ; X64-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) ; X64-NEXT: fldl -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %ret = call x86_fp80 @llvm.experimental.constrained.fpext.x86_fp80.f64(double %a, metadata !"fpexcept.strict") #0 @@ -151,6 +163,7 @@ define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp { ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fstps (%esp) ; X86-NEXT: flds (%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: retl ; @@ -158,6 +171,7 @@ define float @fptrunc_fp80_to_f32(x86_fp80 %a) nounwind strictfp { ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstps -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; X64-NEXT: retq %ret = call float @llvm.experimental.constrained.fptrunc.f32.x86_fp80(x86_fp80 %a, @@ -176,6 +190,7 @@ define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { ; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: fstpl (%esp) ; X86-NEXT: fldl (%esp) +; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: retl @@ -184,6 +199,7 @@ define double @fptrunc_fp80_to_f64(x86_fp80 %a) nounwind strictfp { ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fstpl -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; X64-NEXT: retq %ret = call double @llvm.experimental.constrained.fptrunc.f64.x86_fp80(x86_fp80 %a, @@ -197,12 +213,14 @@ define x86_fp80 @fsqrt_fp80(x86_fp80 %a) nounwind strictfp { ; X86: # %bb.0: ; X86-NEXT: fldt {{[0-9]+}}(%esp) ; X86-NEXT: fsqrt +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: fsqrt_fp80: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: fsqrt +; X64-NEXT: wait ; X64-NEXT: retq %ret = call x86_fp80 @llvm.experimental.constrained.sqrt.x86_fp80(x86_fp80 %a, metadata !"round.dynamic", @@ -216,6 +234,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 { ; X86-NEXT: subl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -231,6 +250,7 @@ define i1 @fp80_to_sint1(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_sint1: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -251,6 +271,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 { ; X86-NEXT: subl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -266,6 +287,7 @@ define i8 @fp80_to_sint8(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_sint8: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -286,6 +308,7 @@ define i16 @fp80_to_sint16(x86_fp80 %x) #0 { ; X86-NEXT: subl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -301,6 +324,7 @@ define i16 @fp80_to_sint16(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_sint16: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -321,6 +345,7 @@ define i32 @fp80_to_sint32(x86_fp80 %x) #0 { ; X86-NEXT: subl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: fnstcw (%esp) ; X86-NEXT: movzwl (%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -336,6 +361,7 @@ define i32 @fp80_to_sint32(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_sint32: ; X64: # %bb.0: # %entry ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -362,6 +388,7 @@ define i64 @fp80_to_sint64(x86_fp80 %x) #0 { ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: fldt 8(%ebp) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -379,6 +406,7 @@ define i64 @fp80_to_sint64(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_sint64: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -399,6 +427,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 { ; X86-NEXT: subl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -414,6 +443,7 @@ define i1 @fp80_to_uint1(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_uint1: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -434,6 +464,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 { ; X86-NEXT: subl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -449,6 +480,7 @@ define i8 @fp80_to_uint8(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_uint8: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -469,6 +501,7 @@ define i16 @fp80_to_uint16(x86_fp80 %x) #0 { ; X86-NEXT: subl $8, %esp ; X86-NEXT: .cfi_def_cfa_offset 12 ; X86-NEXT: fldt {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: fnstcw (%esp) ; X86-NEXT: movzwl (%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -485,6 +518,7 @@ define i16 @fp80_to_uint16(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_uint16: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -511,6 +545,7 @@ define i32 @fp80_to_uint32(x86_fp80 %x) #0 { ; X86-NEXT: andl $-8, %esp ; X86-NEXT: subl $16, %esp ; X86-NEXT: fldt 8(%ebp) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: orl $3072, %eax # imm = 0xC00 @@ -527,6 +562,7 @@ define i32 @fp80_to_uint32(x86_fp80 %x) #0 { ; X64-LABEL: fp80_to_uint32: ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %eax ; X64-NEXT: orl $3072, %eax # imm = 0xC00 @@ -554,6 +590,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 { ; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: flds {{\.LCPI.*}} ; X86-NEXT: fcom %st(1) +; X86-NEXT: wait ; X86-NEXT: fnstsw %ax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: # kill: def $ah killed $ah killed $ax @@ -568,6 +605,7 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 { ; X86-NEXT: .LBB18_2: ; X86-NEXT: fstp %st(1) ; X86-NEXT: fsubrp %st, %st(1) +; X86-NEXT: wait ; X86-NEXT: fnstcw {{[0-9]+}}(%esp) ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -588,14 +626,17 @@ define i64 @fp80_to_uint64(x86_fp80 %x) #0 { ; X64: # %bb.0: ; X64-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-NEXT: flds {{.*}}(%rip) +; X64-NEXT: wait ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: fcomi %st(1), %st +; X64-NEXT: wait ; X64-NEXT: setbe %al ; X64-NEXT: fldz ; X64-NEXT: fxch %st(1) ; X64-NEXT: fcmovnbe %st(1), %st ; X64-NEXT: fstp %st(1) ; X64-NEXT: fsubrp %st, %st(1) +; X64-NEXT: wait ; X64-NEXT: fnstcw -{{[0-9]+}}(%rsp) ; X64-NEXT: movzwl -{{[0-9]+}}(%rsp), %ecx ; X64-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -622,6 +663,7 @@ define x86_fp80 @sint1_to_fp80(i1 %x) #0 { ; X86-NEXT: movsbl %al, %eax ; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -633,6 +675,7 @@ define x86_fp80 @sint1_to_fp80(i1 %x) #0 { ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) ; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i1(i1 %x, metadata !"round.dynamic", @@ -648,6 +691,7 @@ define x86_fp80 @sint8_to_fp80(i8 %x) #0 { ; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -657,6 +701,7 @@ define x86_fp80 @sint8_to_fp80(i8 %x) #0 { ; X64-NEXT: movsbl %dil, %eax ; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) ; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i8(i8 %x, metadata !"round.dynamic", @@ -672,6 +717,7 @@ define x86_fp80 @sint16_to_fp80(i16 %x) #0 { ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -680,6 +726,7 @@ define x86_fp80 @sint16_to_fp80(i16 %x) #0 { ; X64: # %bb.0: ; X64-NEXT: movw %di, -{{[0-9]+}}(%rsp) ; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i16(i16 %x, metadata !"round.dynamic", @@ -695,6 +742,7 @@ define x86_fp80 @sint32_to_fp80(i32 %x) #0 { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: fildl (%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -703,6 +751,7 @@ define x86_fp80 @sint32_to_fp80(i32 %x) #0 { ; X64: # %bb.0: ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i32(i32 %x, metadata !"round.dynamic", @@ -714,12 +763,14 @@ define x86_fp80 @sint64_to_fp80(i64 %x) #0 { ; X86-LABEL: sint64_to_fp80: ; X86: # %bb.0: ; X86-NEXT: fildll {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: retl ; ; X64-LABEL: sint64_to_fp80: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.sitofp.x86_fp80.i64(i64 %x, metadata !"round.dynamic", @@ -737,6 +788,7 @@ define x86_fp80 @uint1_to_fp80(i1 %x) #0 { ; X86-NEXT: movzbl %al, %eax ; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -746,6 +798,7 @@ define x86_fp80 @uint1_to_fp80(i1 %x) #0 { ; X64-NEXT: andl $1, %edi ; X64-NEXT: movw %di, -{{[0-9]+}}(%rsp) ; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i1(i1 %x, metadata !"round.dynamic", @@ -761,6 +814,7 @@ define x86_fp80 @uint8_to_fp80(i8 %x) #0 { ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movw %ax, {{[0-9]+}}(%esp) ; X86-NEXT: filds {{[0-9]+}}(%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -770,6 +824,7 @@ define x86_fp80 @uint8_to_fp80(i8 %x) #0 { ; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: movw %ax, -{{[0-9]+}}(%rsp) ; X64-NEXT: filds -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i8(i8 %x, metadata !"round.dynamic", @@ -785,6 +840,7 @@ define x86_fp80 @uint16_to_fp80(i16 %x) #0 { ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: fildl (%esp) +; X86-NEXT: wait ; X86-NEXT: popl %eax ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: retl @@ -794,6 +850,7 @@ define x86_fp80 @uint16_to_fp80(i16 %x) #0 { ; X64-NEXT: movzwl %di, %eax ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X64-NEXT: fildl -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i16(i16 %x, metadata !"round.dynamic", @@ -815,6 +872,7 @@ define x86_fp80 @uint32_to_fp80(i32 %x) #0 { ; X86-NEXT: movl %eax, (%esp) ; X86-NEXT: movl $0, {{[0-9]+}}(%esp) ; X86-NEXT: fildll (%esp) +; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 @@ -825,6 +883,7 @@ define x86_fp80 @uint32_to_fp80(i32 %x) #0 { ; X64-NEXT: movl %edi, %eax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i32(i32 %x, metadata !"round.dynamic", @@ -849,6 +908,7 @@ define x86_fp80 @uint64_to_fp80(i64 %x) #0 { ; X86-NEXT: shrl $31, %ecx ; X86-NEXT: fildll (%esp) ; X86-NEXT: fadds {{\.LCPI.*}}(,%ecx,4) +; X86-NEXT: wait ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: .cfi_def_cfa %esp, 4 @@ -862,6 +922,7 @@ define x86_fp80 @uint64_to_fp80(i64 %x) #0 { ; X64-NEXT: sets %al ; X64-NEXT: fildll -{{[0-9]+}}(%rsp) ; X64-NEXT: fadds {{\.LCPI.*}}(,%rax,4) +; X64-NEXT: wait ; X64-NEXT: retq %result = call x86_fp80 @llvm.experimental.constrained.uitofp.x86_fp80.i64(i64 %x, metadata !"round.dynamic", diff --git a/llvm/test/CodeGen/X86/vec-strict-128.ll b/llvm/test/CodeGen/X86/vec-strict-128.ll index 5aa0802adf03..98162a1da9a9 100644 --- a/llvm/test/CodeGen/X86/vec-strict-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-128.ll @@ -234,6 +234,7 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: calll fmaf ; SSE-X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE-X86-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) @@ -245,6 +246,7 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { ; SSE-X86-NEXT: movss %xmm0, (%esp) ; SSE-X86-NEXT: calll fmaf ; SSE-X86-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE-X86-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-X86-NEXT: movss %xmm0, {{[0-9]+}}(%esp) @@ -269,8 +271,10 @@ define <4 x float> @f13(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { ; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp) ; SSE-X86-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: calll fmaf ; SSE-X86-NEXT: fstps {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-X86-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-X86-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] @@ -359,8 +363,10 @@ define <2 x double> @f14(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { ; SSE-X86-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload ; SSE-X86-NEXT: movhps %xmm0, (%esp) ; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: calll fma ; SSE-X86-NEXT: fstpl {{[0-9]+}}(%esp) +; SSE-X86-NEXT: wait ; SSE-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-X86-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-X86-NEXT: movl %ebp, %esp diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll index fb1ec511fcca..205ea7f66720 100644 --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll @@ -54,6 +54,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 { ; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -62,6 +63,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 { ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -103,6 +105,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -137,6 +140,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fldl (%esp) ; AVX512F-32-NEXT: fisttpll (%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -171,6 +175,7 @@ define <2 x i64> @strict_vector_fptosi_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -230,6 +235,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; SSE-32-NEXT: movsd %xmm4, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %al ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -247,6 +253,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %cl ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %edx ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 @@ -329,6 +336,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm3, (%esp) ; AVX-32-NEXT: fldl (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %al ; AVX-32-NEXT: movzbl %al, %eax ; AVX-32-NEXT: shll $31, %eax @@ -342,6 +350,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %cl ; AVX-32-NEXT: movzbl %cl, %ecx ; AVX-32-NEXT: shll $31, %ecx @@ -410,6 +419,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX512F-32-NEXT: vmovsd %xmm1, (%esp) ; AVX512F-32-NEXT: fldl (%esp) ; AVX512F-32-NEXT: fisttpll (%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %al ; AVX512F-32-NEXT: shll $31, %eax ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax @@ -422,6 +432,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %cl ; AVX512F-32-NEXT: shll $31, %ecx ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -466,6 +477,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm1, (%esp) ; AVX512VL-32-NEXT: fldl (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax @@ -478,6 +490,7 @@ define <2 x i64> @strict_vector_fptoui_v2f64_to_v2i64(<2 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %cl ; AVX512VL-32-NEXT: shll $31, %ecx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -531,6 +544,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 { ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -539,6 +553,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 { ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -580,6 +595,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX-32-NEXT: fisttpll (%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -614,6 +630,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX512F-32-NEXT: fisttpll (%esp) ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -648,6 +665,7 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX512VL-32-NEXT: fisttpll (%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -723,6 +741,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %al ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -740,6 +759,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %cl ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %edx ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 @@ -822,6 +842,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %al ; AVX-32-NEXT: movzbl %al, %eax ; AVX-32-NEXT: shll $31, %eax @@ -835,6 +856,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm0, (%esp) ; AVX-32-NEXT: flds (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %cl ; AVX-32-NEXT: movzbl %cl, %ecx ; AVX-32-NEXT: shll $31, %ecx @@ -903,6 +925,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX512F-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %al ; AVX512F-32-NEXT: shll $31, %eax ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %eax @@ -915,6 +938,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX512F-32-NEXT: vmovss %xmm0, (%esp) ; AVX512F-32-NEXT: flds (%esp) ; AVX512F-32-NEXT: fisttpll (%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %cl ; AVX512F-32-NEXT: shll $31, %ecx ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -959,6 +983,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm1, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax @@ -971,6 +996,7 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm0, (%esp) ; AVX512VL-32-NEXT: flds (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %cl ; AVX512VL-32-NEXT: shll $31, %ecx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -1128,6 +1154,7 @@ define <2 x i32> @strict_vector_fptoui_v2f64_to_v2i32(<2 x double> %a) #0 { ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, (%esp), %xmm0, %xmm0 ; AVX-32-NEXT: movl %ebp, %esp @@ -1283,6 +1310,7 @@ define <2 x i32> @strict_vector_fptoui_v2f32_to_v2i32(<2 x float> %a) #0 { ; AVX-32-NEXT: fisttpll (%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: movl %ebp, %esp @@ -1764,6 +1792,7 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 { ; SSE-32-NEXT: movhps %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1772,6 +1801,7 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 { ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -1813,6 +1843,7 @@ define <2 x i1> @strict_vector_fptosi_v2f64_to_v2i1(<2 x double> %a) #0 { ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -1896,6 +1927,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 { ; SSE-32-NEXT: movsd %xmm4, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %al ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -1913,6 +1945,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 { ; SSE-32-NEXT: movsd %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %cl ; SSE-32-NEXT: fldl {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %edx ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 @@ -1995,6 +2028,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm3, (%esp) ; AVX-32-NEXT: fldl (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %al ; AVX-32-NEXT: movzbl %al, %eax ; AVX-32-NEXT: shll $31, %eax @@ -2008,6 +2042,7 @@ define <2 x i1> @strict_vector_fptoui_v2f64_to_v2i1(<2 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %cl ; AVX-32-NEXT: movzbl %cl, %ecx ; AVX-32-NEXT: shll $31, %ecx @@ -2111,6 +2146,7 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 { ; SSE-32-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -2119,6 +2155,7 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 { ; SSE-32-NEXT: fistpll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fldcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %eax ; SSE-32-NEXT: orl $3072, %eax # imm = 0xC00 @@ -2160,6 +2197,7 @@ define <2 x i1> @strict_vector_fptosi_v2f32_to_v2i1(<2 x float> %a) #0 { ; AVX-32-NEXT: fisttpll (%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -2265,6 +2303,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 { ; SSE-32-NEXT: movss %xmm4, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %al ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw {{[0-9]+}}(%esp) ; SSE-32-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; SSE-32-NEXT: orl $3072, %ecx # imm = 0xC00 @@ -2282,6 +2321,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 { ; SSE-32-NEXT: movss %xmm0, {{[0-9]+}}(%esp) ; SSE-32-NEXT: setae %cl ; SSE-32-NEXT: flds {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: fnstcw (%esp) ; SSE-32-NEXT: movzwl (%esp), %edx ; SSE-32-NEXT: orl $3072, %edx # imm = 0xC00 @@ -2364,6 +2404,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %al ; AVX-32-NEXT: movzbl %al, %eax ; AVX-32-NEXT: shll $31, %eax @@ -2377,6 +2418,7 @@ define <2 x i1> @strict_vector_fptoui_v2f32_to_v2i1(<2 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm0, (%esp) ; AVX-32-NEXT: flds (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %cl ; AVX-32-NEXT: movzbl %cl, %ecx ; AVX-32-NEXT: shll $31, %ecx diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll index 6b7f083a2c87..38f19e9ed2d5 100644 --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-256.ll @@ -57,6 +57,7 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -111,6 +112,7 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fldl (%esp) ; AVX512F-32-NEXT: fisttpll (%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -165,6 +167,7 @@ define <4 x i64> @strict_vector_fptosi_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -236,6 +239,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %al ; AVX-32-NEXT: movzbl %al, %eax ; AVX-32-NEXT: shll $31, %eax @@ -252,6 +256,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm4, (%esp) ; AVX-32-NEXT: fldl (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %cl ; AVX-32-NEXT: movzbl %cl, %ecx ; AVX-32-NEXT: shll $31, %ecx @@ -266,6 +271,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %dl ; AVX-32-NEXT: movzbl %dl, %edx ; AVX-32-NEXT: shll $31, %edx @@ -279,6 +285,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -385,6 +392,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: movl $0, %eax ; AVX512F-32-NEXT: setae %al ; AVX512F-32-NEXT: shll $31, %eax @@ -402,6 +410,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512F-32-NEXT: vmovsd %xmm4, (%esp) ; AVX512F-32-NEXT: fldl (%esp) ; AVX512F-32-NEXT: fisttpll (%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %cl ; AVX512F-32-NEXT: shll $31, %ecx ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -415,6 +424,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512F-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %dl ; AVX512F-32-NEXT: shll $31, %edx ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -427,6 +437,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512F-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -489,6 +500,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -506,6 +518,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp) ; AVX512VL-32-NEXT: fldl (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %cl ; AVX512VL-32-NEXT: shll $31, %ecx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -519,6 +532,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm2, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %dl ; AVX512VL-32-NEXT: shll $31, %edx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -531,6 +545,7 @@ define <4 x i64> @strict_vector_fptoui_v4f64_to_v4i64(<4 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -606,6 +621,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX-32-NEXT: flds (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -659,6 +675,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: flds (%esp) ; AVX512F-32-NEXT: fisttpll (%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512F-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -712,6 +729,7 @@ define <4 x i64> @strict_vector_fptosi_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -783,6 +801,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %al ; AVX-32-NEXT: movzbl %al, %eax ; AVX-32-NEXT: shll $31, %eax @@ -798,6 +817,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm3, (%esp) ; AVX-32-NEXT: flds (%esp) ; AVX-32-NEXT: fisttpll (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %cl ; AVX-32-NEXT: movzbl %cl, %ecx ; AVX-32-NEXT: shll $31, %ecx @@ -813,6 +833,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: setae %dl ; AVX-32-NEXT: movzbl %dl, %edx ; AVX-32-NEXT: shll $31, %edx @@ -826,6 +847,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ; AVX-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 ; AVX-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -932,6 +954,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: movl $0, %eax ; AVX512F-32-NEXT: setae %al ; AVX512F-32-NEXT: shll $31, %eax @@ -948,6 +971,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512F-32-NEXT: vmovss %xmm2, (%esp) ; AVX512F-32-NEXT: flds (%esp) ; AVX512F-32-NEXT: fisttpll (%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %cl ; AVX512F-32-NEXT: shll $31, %ecx ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -962,6 +986,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512F-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: setae %dl ; AVX512F-32-NEXT: shll $31, %edx ; AVX512F-32-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -974,6 +999,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512F-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512F-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512F-32-NEXT: wait ; AVX512F-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512F-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 ; AVX512F-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -1036,6 +1062,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -1052,6 +1079,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm2, (%esp) ; AVX512VL-32-NEXT: flds (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %cl ; AVX512VL-32-NEXT: shll $31, %ecx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %ecx @@ -1066,6 +1094,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm2, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %dl ; AVX512VL-32-NEXT: shll $31, %edx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -1078,6 +1107,7 @@ define <4 x i64> @strict_vector_fptoui_v4f32_to_v4i64(<4 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll index 3cc9edb83e13..06464ea1cb81 100644 --- a/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-fptoint-512.ll @@ -65,6 +65,7 @@ define <8 x i64> @strict_vector_fptosi_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -160,6 +161,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -176,6 +178,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm4, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -190,6 +193,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -206,6 +210,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm4, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -220,6 +225,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -237,6 +243,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm4, (%esp) ; AVX512VL-32-NEXT: fldl (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %dl ; AVX512VL-32-NEXT: shll $31, %edx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -250,6 +257,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax @@ -262,6 +270,7 @@ define <8 x i64> @strict_vector_fptoui_v8f64_to_v8i64(<8 x double> %a) #0 { ; AVX512VL-32-NEXT: vmovsd %xmm0, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fldl {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 @@ -370,6 +379,7 @@ define <8 x i64> @strict_vector_fptosi_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 @@ -465,6 +475,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -480,6 +491,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -495,6 +507,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -511,6 +524,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -525,6 +539,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm4, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: movl $0, %eax ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax @@ -541,6 +556,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm4, (%esp) ; AVX512VL-32-NEXT: flds (%esp) ; AVX512VL-32-NEXT: fisttpll (%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %dl ; AVX512VL-32-NEXT: shll $31, %edx ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %edx @@ -555,6 +571,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm3, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: setae %al ; AVX512VL-32-NEXT: shll $31, %eax ; AVX512VL-32-NEXT: xorl {{[0-9]+}}(%esp), %eax @@ -567,6 +584,7 @@ define <8 x i64> @strict_vector_fptoui_v8f32_to_v8i64(<8 x float> %a) #0 { ; AVX512VL-32-NEXT: vmovss %xmm0, {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: flds {{[0-9]+}}(%esp) ; AVX512VL-32-NEXT: fisttpll {{[0-9]+}}(%esp) +; AVX512VL-32-NEXT: wait ; AVX512VL-32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX512VL-32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ; AVX512VL-32-NEXT: vpinsrd $2, (%esp), %xmm0, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll index f3c79c2cb397..51d9fe2bb864 100644 --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-128.ll @@ -141,6 +141,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 { ; SSE-32-NEXT: fstps (%esp) ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fstps {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] @@ -177,6 +178,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 { ; SSE41-32-NEXT: fstps (%esp) ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp) +; SSE41-32-NEXT: wait ; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] @@ -213,6 +215,7 @@ define <2 x float> @sitofp_v2i64_v2f32(<2 x i64> %x) #0 { ; AVX-32-NEXT: fstps (%esp) ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; AVX-32-NEXT: movl %ebp, %esp @@ -277,12 +280,14 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 { ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; SSE-32-NEXT: fstps (%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE-32-NEXT: movd %xmm0, %eax ; SSE-32-NEXT: shrl $31, %eax ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; SSE-32-NEXT: fstps {{[0-9]+}}(%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] @@ -344,12 +349,14 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 { ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE41-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; SSE41-32-NEXT: fstps (%esp) +; SSE41-32-NEXT: wait ; SSE41-32-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] ; SSE41-32-NEXT: movd %xmm0, %eax ; SSE41-32-NEXT: shrl $31, %eax ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE41-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; SSE41-32-NEXT: fstps {{[0-9]+}}(%esp) +; SSE41-32-NEXT: wait ; SSE41-32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; SSE41-32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; SSE41-32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] @@ -410,11 +417,13 @@ define <2 x float> @uitofp_v2i64_v2f32(<2 x i64> %x) #0 { ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vextractps $3, %xmm0, %eax ; AVX-32-NEXT: shrl $31, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; AVX-32-NEXT: fstps (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; AVX-32-NEXT: movl %ebp, %esp @@ -1143,6 +1152,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 { ; SSE-32-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE-32-NEXT: fstpl (%esp) +; SSE-32-NEXT: wait ; SSE-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE-32-NEXT: movl %ebp, %esp @@ -1178,6 +1188,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 { ; SSE41-32-NEXT: fstpl {{[0-9]+}}(%esp) ; SSE41-32-NEXT: fildll {{[0-9]+}}(%esp) ; SSE41-32-NEXT: fstpl (%esp) +; SSE41-32-NEXT: wait ; SSE41-32-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; SSE41-32-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; SSE41-32-NEXT: movl %ebp, %esp @@ -1213,6 +1224,7 @@ define <2 x double> @sitofp_v2i64_v2f64(<2 x i64> %x) #0 { ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstpl (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-32-NEXT: movl %ebp, %esp diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll index 60f0c3430125..5fbb8aa864c1 100644 --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-256.ll @@ -651,12 +651,14 @@ define <4 x double> @sitofp_v4i64_v4f64(<4 x i64> %x) #0 { ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; AVX-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstpl {{[0-9]+}}(%esp) ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstpl (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; AVX-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; AVX-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 @@ -889,6 +891,7 @@ define <4 x float> @sitofp_v4i64_v4f32(<4 x i64> %x) #0 { ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fstps (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] @@ -1008,21 +1011,25 @@ define <4 x float> @uitofp_v4i64_v4f32(<4 x i64> %x) #0 { ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; AVX-32-NEXT: fstps (%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vextractps $3, %xmm0, %eax ; AVX-32-NEXT: shrl $31, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vextractps $1, %xmm1, %eax ; AVX-32-NEXT: shrl $31, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vextractps $3, %xmm1, %eax ; AVX-32-NEXT: shrl $31, %eax ; AVX-32-NEXT: fildll {{[0-9]+}}(%esp) ; AVX-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; AVX-32-NEXT: fstps {{[0-9]+}}(%esp) +; AVX-32-NEXT: wait ; AVX-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; AVX-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] diff --git a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll index b61c401a6e46..55134b4e0d6d 100644 --- a/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-inttofp-512.ll @@ -290,12 +290,14 @@ define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 { ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl (%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; NODQ-32-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 @@ -303,12 +305,14 @@ define <8 x double> @sitofp_v8i64_v8f64(<8 x i64> %x) #0 { ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstpl {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero ; NODQ-32-NEXT: vmovhps {{.*#+}} xmm2 = xmm2[0,1],mem[0,1] ; NODQ-32-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 @@ -422,6 +426,7 @@ define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 { ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] @@ -434,6 +439,7 @@ define <8 x float> @sitofp_v8i64_v8f32(<8 x i64> %x) #0 { ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fstps (%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] @@ -514,41 +520,49 @@ define <8 x float> @uitofp_v8i64_v8f32(<8 x i64> %x) #0 { ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps (%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vextractps $3, %xmm0, %eax ; NODQ-32-NEXT: shrl $31, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vextractps $1, %xmm3, %eax ; NODQ-32-NEXT: shrl $31, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vextractps $3, %xmm3, %eax ; NODQ-32-NEXT: shrl $31, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vextractps $1, %xmm2, %eax ; NODQ-32-NEXT: shrl $31, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vextractps $3, %xmm2, %eax ; NODQ-32-NEXT: shrl $31, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vextractps $1, %xmm1, %eax ; NODQ-32-NEXT: shrl $31, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vextractps $3, %xmm1, %eax ; NODQ-32-NEXT: shrl $31, %eax ; NODQ-32-NEXT: fildll {{[0-9]+}}(%esp) ; NODQ-32-NEXT: fadds {{\.LCPI.*}}(,%eax,4) ; NODQ-32-NEXT: fstps {{[0-9]+}}(%esp) +; NODQ-32-NEXT: wait ; NODQ-32-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] ; NODQ-32-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index cd8a06f71d02..7c35ccebefb7 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -92,6 +92,7 @@ define <3 x double> @constrained_vector_fdiv_v3f64() #0 { ; CHECK-NEXT: movapd %xmm0, %xmm1 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fdiv_v3f64: @@ -292,6 +293,7 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 { ; CHECK-NEXT: callq fmod ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -484,6 +486,7 @@ define <3 x double> @constrained_vector_fmul_v3f64() #0 { ; CHECK-NEXT: movapd %xmm0, %xmm1 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fmul_v3f64: @@ -621,6 +624,7 @@ define <3 x double> @constrained_vector_fadd_v3f64() #0 { ; CHECK-NEXT: movapd %xmm0, %xmm1 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fadd_v3f64: @@ -760,6 +764,7 @@ define <3 x double> @constrained_vector_fsub_v3f64() #0 { ; CHECK-NEXT: movapd %xmm0, %xmm1 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fsub_v3f64: @@ -892,6 +897,7 @@ define <3 x double> @constrained_vector_sqrt_v3f64() #0 { ; CHECK-NEXT: movapd %xmm0, %xmm1 ; CHECK-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_sqrt_v3f64: @@ -1077,6 +1083,7 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 { ; CHECK-NEXT: callq pow ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -1333,6 +1340,7 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 { ; CHECK-NEXT: callq __powidf2 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -1570,6 +1578,7 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 { ; CHECK-NEXT: callq sin ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -1794,6 +1803,7 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 { ; CHECK-NEXT: callq cos ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -2018,6 +2028,7 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 { ; CHECK-NEXT: callq exp ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -2242,6 +2253,7 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 { ; CHECK-NEXT: callq exp2 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -2466,6 +2478,7 @@ define <3 x double> @constrained_vector_log_v3f64() #0 { ; CHECK-NEXT: callq log ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -2690,6 +2703,7 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 { ; CHECK-NEXT: callq log10 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -2914,6 +2928,7 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 { ; CHECK-NEXT: callq log2 ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -3116,6 +3131,7 @@ define <3 x double> @constrained_vector_rint_v3f64() #0 { ; CHECK-NEXT: callq rint ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -3286,6 +3302,7 @@ define <3 x double> @constrained_vector_nearby_v3f64() #0 { ; CHECK-NEXT: callq nearbyint ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -3492,6 +3509,7 @@ define <3 x double> @constrained_vector_max_v3f64() #0 { ; CHECK-NEXT: callq fmax ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -3742,6 +3760,7 @@ define <3 x double> @constrained_vector_min_v3f64() #0 { ; CHECK-NEXT: callq fmin ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -5549,6 +5568,7 @@ define <3 x double> @constrained_vector_fpext_v3f32() #0 { ; CHECK-NEXT: cvtss2sd %xmm2, %xmm2 ; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX-LABEL: constrained_vector_fpext_v3f32: @@ -5694,6 +5714,7 @@ define <3 x double> @constrained_vector_ceil_v3f64() #0 { ; CHECK-NEXT: callq ceil ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -5822,6 +5843,7 @@ define <3 x double> @constrained_vector_floor_v3f64() #0 { ; CHECK-NEXT: callq floor ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -5972,6 +5994,7 @@ define <3 x double> @constrained_vector_round_v3f64() #0 { ; CHECK-NEXT: callq round ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -6112,6 +6135,7 @@ define <3 x double> @constrained_vector_trunc_v3f64() #0 { ; CHECK-NEXT: callq trunc ; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movsd (%rsp), %xmm0 # 8-byte Reload ; CHECK-NEXT: # xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload @@ -6334,6 +6358,7 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 { ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 ; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movapd %xmm2, %xmm0 ; CHECK-NEXT: retq ; @@ -6401,6 +6426,7 @@ define <3 x double> @constrained_vector_sitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: cvtsi2sd %rdx, %xmm2 ; CHECK-NEXT: movsd %xmm2, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_sitofp_v3f64_v3i64: @@ -6988,6 +7014,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 { ; CHECK-NEXT: cvtsi2sd %rax, %xmm0 ; CHECK-NEXT: movsd %xmm0, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: movapd %xmm2, %xmm0 ; CHECK-NEXT: retq ; @@ -7096,6 +7123,7 @@ define <3 x double> @constrained_vector_uitofp_v3f64_v3i64(<3 x i64> %x) #0 { ; CHECK-NEXT: addpd %xmm4, %xmm2 ; CHECK-NEXT: movlpd %xmm2, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: wait ; CHECK-NEXT: retq ; ; AVX1-LABEL: constrained_vector_uitofp_v3f64_v3i64: