From 48abac82b808315d387185bb2e44688add679073 Mon Sep 17 00:00:00 2001 From: Quentin Colombet Date: Sat, 17 Feb 2018 03:05:33 +0000 Subject: [PATCH] Revert "[MachineCopyPropagation] Extend pass to do COPY source forwarding" This reverts commit r323991. This commit breaks target that don't model all the register constraints in TableGen. So far the workaround was to set the hasExtraXXXRegAllocReq, but it proves that it doesn't cover all the cases. For instance, when mutating an instruction (like in the lowering of COPYs) the isRenamable flag is not properly updated. The same problem will happen when attaching machine operand from one instruction to another. Geoff Berry is working on a fix in https://reviews.llvm.org/D43042. llvm-svn: 325421 --- llvm/lib/CodeGen/MachineCopyPropagation.cpp | 207 +-------- llvm/lib/CodeGen/TargetPassConfig.cpp | 4 - .../CodeGen/AArch64/aarch64-fold-lslfast.ll | 9 +- .../CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll | 16 +- .../AArch64/arm64-zero-cycle-regmov.ll | 6 +- llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll | 3 +- llvm/test/CodeGen/AArch64/copyprop.mir | 104 ----- llvm/test/CodeGen/AArch64/f16-instructions.ll | 2 +- llvm/test/CodeGen/AArch64/flags-multiuse.ll | 5 +- llvm/test/CodeGen/AArch64/ldst-opt.ll | 2 +- .../CodeGen/AArch64/merge-store-dependency.ll | 2 +- llvm/test/CodeGen/AArch64/neg-imm.ll | 4 +- llvm/test/CodeGen/AArch64/swifterror.ll | 6 +- .../AMDGPU/callee-special-input-sgprs.ll | 12 +- llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir | 2 +- llvm/test/CodeGen/AMDGPU/multilevel-break.ll | 2 +- llvm/test/CodeGen/AMDGPU/ret.ll | 18 +- llvm/test/CodeGen/ARM/atomic-op.ll | 6 +- llvm/test/CodeGen/ARM/intrinsics-overflow.ll | 2 +- llvm/test/CodeGen/ARM/swifterror.ll | 9 +- llvm/test/CodeGen/Mips/llvm-ir/ashr.ll | 2 +- llvm/test/CodeGen/Mips/llvm-ir/lshr.ll | 10 +- llvm/test/CodeGen/Mips/llvm-ir/shl.ll | 4 +- llvm/test/CodeGen/Mips/llvm-ir/sub.ll | 2 +- .../PowerPC/MCSE-caller-preserved-reg.ll | 2 +- llvm/test/CodeGen/PowerPC/fma-mutate.ll | 3 +- llvm/test/CodeGen/PowerPC/gpr-vsr-spill.ll | 2 +- llvm/test/CodeGen/PowerPC/licm-remat.ll | 4 +- .../CodeGen/PowerPC/opt-li-add-to-addi.ll | 2 +- llvm/test/CodeGen/PowerPC/tail-dup-layout.ll | 2 +- llvm/test/CodeGen/SPARC/32abi.ll | 4 +- llvm/test/CodeGen/SPARC/atomics.ll | 5 +- llvm/test/CodeGen/SystemZ/vec-sub-01.ll | 12 +- llvm/test/CodeGen/Thumb/pr35836.ll | 4 +- .../CodeGen/Thumb/thumb-shrink-wrapping.ll | 2 +- .../CodeGen/X86/2006-03-01-InstrSchedBug.ll | 2 +- llvm/test/CodeGen/X86/arg-copy-elide.ll | 2 +- llvm/test/CodeGen/X86/avx-load-store.ll | 6 +- llvm/test/CodeGen/X86/avx512-bugfix-25270.ll | 4 +- llvm/test/CodeGen/X86/avx512-calling-conv.ll | 2 +- llvm/test/CodeGen/X86/avx512-intel-ocl.ll | 2 +- .../test/CodeGen/X86/avx512-regcall-NoMask.ll | 12 +- llvm/test/CodeGen/X86/buildvec-insertvec.ll | 2 +- llvm/test/CodeGen/X86/combine-fcopysign.ll | 8 +- llvm/test/CodeGen/X86/combine-shl.ll | 2 +- llvm/test/CodeGen/X86/complex-fastmath.ll | 10 +- llvm/test/CodeGen/X86/divide-by-constant.ll | 2 +- llvm/test/CodeGen/X86/fixup-sfb.ll | 22 +- llvm/test/CodeGen/X86/fmaxnum.ll | 8 +- llvm/test/CodeGen/X86/fmf-flags.ll | 2 +- llvm/test/CodeGen/X86/fminnum.ll | 8 +- llvm/test/CodeGen/X86/fp128-i128.ll | 4 +- llvm/test/CodeGen/X86/h-registers-1.ll | 6 +- llvm/test/CodeGen/X86/haddsub-2.ll | 12 +- llvm/test/CodeGen/X86/haddsub-3.ll | 6 +- llvm/test/CodeGen/X86/haddsub-undef.ll | 4 +- llvm/test/CodeGen/X86/half.ll | 6 +- .../CodeGen/X86/horizontal-reduce-smax.ll | 4 +- .../CodeGen/X86/horizontal-reduce-smin.ll | 4 +- .../CodeGen/X86/horizontal-reduce-umax.ll | 8 +- .../CodeGen/X86/horizontal-reduce-umin.ll | 12 +- llvm/test/CodeGen/X86/inline-asm-fpstack.ll | 1 - llvm/test/CodeGen/X86/ipra-local-linkage.ll | 2 +- llvm/test/CodeGen/X86/localescape.ll | 2 +- llvm/test/CodeGen/X86/machine-cp.ll | 4 +- llvm/test/CodeGen/X86/mul-i1024.ll | 165 ++++---- llvm/test/CodeGen/X86/mul-i256.ll | 10 +- llvm/test/CodeGen/X86/mul-i512.ll | 45 +- llvm/test/CodeGen/X86/mul128.ll | 2 +- llvm/test/CodeGen/X86/mulvi32.ll | 2 +- llvm/test/CodeGen/X86/musttail-varargs.ll | 6 +- llvm/test/CodeGen/X86/pmul.ll | 48 +-- llvm/test/CodeGen/X86/powi.ll | 2 +- llvm/test/CodeGen/X86/pr11334.ll | 2 +- llvm/test/CodeGen/X86/pr29112.ll | 8 +- llvm/test/CodeGen/X86/pr34080-2.ll | 4 +- .../test/CodeGen/X86/required-vector-width.ll | 10 +- llvm/test/CodeGen/X86/retpoline-external.ll | 4 +- llvm/test/CodeGen/X86/retpoline.ll | 4 +- llvm/test/CodeGen/X86/sad.ll | 2 +- llvm/test/CodeGen/X86/safestack.ll | 2 +- llvm/test/CodeGen/X86/safestack_inline.ll | 2 +- llvm/test/CodeGen/X86/scalar_widen_div.ll | 4 +- llvm/test/CodeGen/X86/select.ll | 3 +- llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll | 2 +- llvm/test/CodeGen/X86/slow-pmulld.ll | 4 +- llvm/test/CodeGen/X86/sqrt-fastmath.ll | 8 +- llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll | 12 +- llvm/test/CodeGen/X86/sse1.ll | 4 +- llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll | 4 +- llvm/test/CodeGen/X86/statepoint-live-in.ll | 2 +- .../CodeGen/X86/statepoint-stack-usage.ll | 6 +- llvm/test/CodeGen/X86/vec_fp_to_int.ll | 26 +- llvm/test/CodeGen/X86/vec_int_to_fp.ll | 4 +- llvm/test/CodeGen/X86/vec_minmax_uint.ll | 4 +- llvm/test/CodeGen/X86/vec_shift4.ll | 8 +- llvm/test/CodeGen/X86/vector-blend.ll | 2 +- llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll | 10 +- llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll | 2 +- llvm/test/CodeGen/X86/vector-mul.ll | 8 +- llvm/test/CodeGen/X86/vector-rotate-128.ll | 16 +- llvm/test/CodeGen/X86/vector-sext.ll | 12 +- .../test/CodeGen/X86/vector-shift-ashr-128.ll | 2 +- .../test/CodeGen/X86/vector-shift-lshr-128.ll | 8 +- llvm/test/CodeGen/X86/vector-shift-shl-128.ll | 14 +- .../CodeGen/X86/vector-shuffle-combining.ll | 2 +- llvm/test/CodeGen/X86/vector-trunc-math.ll | 2 +- llvm/test/CodeGen/X86/vector-trunc-packus.ll | 12 +- llvm/test/CodeGen/X86/vector-zext.ll | 14 +- llvm/test/CodeGen/X86/vselect-minmax.ll | 392 +++--------------- llvm/test/CodeGen/X86/widen_conv-3.ll | 2 +- llvm/test/CodeGen/X86/widen_conv-4.ll | 4 +- llvm/test/CodeGen/X86/win64_frame.ll | 4 +- .../CodeGen/X86/x86-interleaved-access.ll | 2 +- .../CodeGen/X86/x86-shrink-wrap-unwind.ll | 6 +- llvm/test/CodeGen/X86/x86-shrink-wrapping.ll | 4 +- llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll | 2 +- llvm/test/DebugInfo/X86/spill-nospill.ll | 2 +- 118 files changed, 508 insertions(+), 1073 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/copyprop.mir diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp index 4c1717e1ec3c..fcec05adc732 100644 --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -9,35 +9,6 @@ // // This is an extremely simple MachineInstr-level copy propagation pass. // -// This pass forwards the source of COPYs to the users of their destinations -// when doing so is legal. For example: -// -// %reg1 = COPY %reg0 -// ... -// ... = OP %reg1 -// -// If -// - %reg0 has not been clobbered by the time of the use of %reg1 -// - the register class constraints are satisfied -// - the COPY def is the only value that reaches OP -// then this pass replaces the above with: -// -// %reg1 = COPY %reg0 -// ... -// ... = OP %reg0 -// -// This pass also removes some redundant COPYs. For example: -// -// %R1 = COPY %R0 -// ... // No clobber of %R1 -// %R0 = COPY %R1 <<< Removed -// -// or -// -// %R1 = COPY %R0 -// ... // No clobber of %R0 -// %R1 = COPY %R0 <<< Removed -// //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" @@ -52,13 +23,11 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSubtargetInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Pass.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/DebugCounter.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -68,9 +37,6 @@ using namespace llvm; #define DEBUG_TYPE "machine-cp" STATISTIC(NumDeletes, "Number of dead copies deleted"); -STATISTIC(NumCopyForwards, "Number of copy uses forwarded"); -DEBUG_COUNTER(FwdCounter, "machine-cp-fwd", - "Controls which register COPYs are forwarded"); namespace { @@ -107,10 +73,6 @@ using Reg2MIMap = DenseMap; void ReadRegister(unsigned Reg); void CopyPropagateBlock(MachineBasicBlock &MBB); bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def); - void forwardUses(MachineInstr &MI); - bool isForwardableRegClassCopy(const MachineInstr &Copy, - const MachineInstr &UseI, unsigned UseIdx); - bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); /// Candidates for deletion. SmallSetVector MaybeDeadCopies; @@ -246,152 +208,6 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src, return true; } -/// Decide whether we should forward the source of \param Copy to its use in -/// \param UseI based on the physical register class constraints of the opcode -/// and avoiding introducing more cross-class COPYs. -bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, - const MachineInstr &UseI, - unsigned UseIdx) { - - unsigned CopySrcReg = Copy.getOperand(1).getReg(); - - // If the new register meets the opcode register constraints, then allow - // forwarding. - if (const TargetRegisterClass *URC = - UseI.getRegClassConstraint(UseIdx, TII, TRI)) - return URC->contains(CopySrcReg); - - if (!UseI.isCopy()) - return false; - - /// COPYs don't have register class constraints, so if the user instruction - /// is a COPY, we just try to avoid introducing additional cross-class - /// COPYs. For example: - /// - /// RegClassA = COPY RegClassB // Copy parameter - /// ... - /// RegClassB = COPY RegClassA // UseI parameter - /// - /// which after forwarding becomes - /// - /// RegClassA = COPY RegClassB - /// ... - /// RegClassB = COPY RegClassB - /// - /// so we have reduced the number of cross-class COPYs and potentially - /// introduced a nop COPY that can be removed. - const TargetRegisterClass *UseDstRC = - TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); - - const TargetRegisterClass *SuperRC = UseDstRC; - for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); - SuperRC; SuperRC = *SuperRCI++) - if (SuperRC->contains(CopySrcReg)) - return true; - - return false; -} - -/// Check that \p MI does not have implicit uses that overlap with it's \p Use -/// operand (the register being replaced), since these can sometimes be -/// implicitly tied to other operands. For example, on AMDGPU: -/// -/// V_MOVRELS_B32_e32 %VGPR2, %M0, %EXEC, %VGPR2_VGPR3_VGPR4_VGPR5 -/// -/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no -/// way of knowing we need to update the latter when updating the former. -bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI, - const MachineOperand &Use) { - for (const MachineOperand &MIUse : MI.uses()) - if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() && - MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg())) - return true; - - return false; -} - -/// Look for available copies whose destination register is used by \p MI and -/// replace the use in \p MI with the copy's source register. -void MachineCopyPropagation::forwardUses(MachineInstr &MI) { - if (AvailCopyMap.empty()) - return; - - // Look for non-tied explicit vreg uses that have an active COPY - // instruction that defines the physical register allocated to them. - // Replace the vreg with the source of the active COPY. - for (unsigned OpIdx = 0, OpEnd = MI.getNumOperands(); OpIdx < OpEnd; - ++OpIdx) { - MachineOperand &MOUse = MI.getOperand(OpIdx); - // Don't forward into undef use operands since doing so can cause problems - // with the machine verifier, since it doesn't treat undef reads as reads, - // so we can end up with a live range that ends on an undef read, leading to - // an error that the live range doesn't end on a read of the live range - // register. - if (!MOUse.isReg() || MOUse.isTied() || MOUse.isUndef() || MOUse.isDef() || - MOUse.isImplicit()) - continue; - - if (!MOUse.getReg()) - continue; - - // Check that the register is marked 'renamable' so we know it is safe to - // rename it without violating any constraints that aren't expressed in the - // IR (e.g. ABI or opcode requirements). - if (!MOUse.isRenamable()) - continue; - - auto CI = AvailCopyMap.find(MOUse.getReg()); - if (CI == AvailCopyMap.end()) - continue; - - MachineInstr &Copy = *CI->second; - unsigned CopyDstReg = Copy.getOperand(0).getReg(); - const MachineOperand &CopySrc = Copy.getOperand(1); - unsigned CopySrcReg = CopySrc.getReg(); - - // FIXME: Don't handle partial uses of wider COPYs yet. - if (MOUse.getReg() != CopyDstReg) { - DEBUG(dbgs() << "MCP: FIXME! Not forwarding COPY to sub-register use:\n " - << MI); - continue; - } - - // Don't forward COPYs of reserved regs unless they are constant. - if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg)) - continue; - - if (!isForwardableRegClassCopy(Copy, MI, OpIdx)) - continue; - - if (hasImplicitOverlap(MI, MOUse)) - continue; - - if (!DebugCounter::shouldExecute(FwdCounter)) { - DEBUG(dbgs() << "MCP: Skipping forwarding due to debug counter:\n " - << MI); - continue; - } - - DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI) - << "\n with " << printReg(CopySrcReg, TRI) << "\n in " - << MI << " from " << Copy); - - MOUse.setReg(CopySrcReg); - if (!CopySrc.isRenamable()) - MOUse.setIsRenamable(false); - - DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); - - // Clear kill markers that may have been invalidated. - for (MachineInstr &KMI : - make_range(Copy.getIterator(), std::next(MI.getIterator()))) - KMI.clearRegisterKills(CopySrcReg, TRI); - - ++NumCopyForwards; - Changed = true; - } -} - void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DEBUG(dbgs() << "MCP: CopyPropagateBlock " << MBB.getName() << "\n"); @@ -425,11 +241,6 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { if (eraseIfRedundant(*MI, Def, Src) || eraseIfRedundant(*MI, Src, Def)) continue; - forwardUses(*MI); - - // Src may have been changed by forwardUses() - Src = MI->getOperand(1).getReg(); - // If Src is defined by a previous copy, the previous copy cannot be // eliminated. ReadRegister(Src); @@ -481,20 +292,6 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { continue; } - // Clobber any earlyclobber regs first. - for (const MachineOperand &MO : MI->operands()) - if (MO.isReg() && MO.isEarlyClobber()) { - unsigned Reg = MO.getReg(); - // If we have a tied earlyclobber, that means it is also read by this - // instruction, so we need to make sure we don't remove it as dead - // later. - if (MO.isTied()) - ReadRegister(Reg); - ClobberRegister(Reg); - } - - forwardUses(*MI); - // Not a copy. SmallVector Defs; const MachineOperand *RegMask = nullptr; @@ -510,7 +307,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "MachineCopyPropagation should be run after register allocation!"); - if (MO.isDef() && !MO.isEarlyClobber()) { + if (MO.isDef()) { Defs.push_back(Reg); continue; } else if (MO.readsReg()) @@ -567,8 +364,6 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { // since we don't want to trust live-in lists. if (MBB.succ_empty()) { for (MachineInstr *MaybeDead : MaybeDeadCopies) { - DEBUG(dbgs() << "MCP: Removing copy due to no live-out succ: "; - MaybeDead->dump()); assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg())); MaybeDead->eraseFromParent(); Changed = true; diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp index 84597570c16e..962836398e1d 100644 --- a/llvm/lib/CodeGen/TargetPassConfig.cpp +++ b/llvm/lib/CodeGen/TargetPassConfig.cpp @@ -1080,10 +1080,6 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // kill markers. addPass(&StackSlotColoringID); - // Copy propagate to forward register uses and try to eliminate COPYs that - // were not coalesced. - addPass(&MachineCopyPropagationID); - // Run post-ra machine LICM to hoist reloads / remats. // // FIXME: can this move into MachineLateOptimization? diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll index 55ddaf8b65f1..0dfe04b664d0 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -9,8 +9,7 @@ define i16 @halfword(%struct.a* %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: halfword: ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 ; CHECK: ldrh [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #1] -; CHECK: mov [[REG3:x[0-9]+]], [[REG2]] -; CHECK: strh [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #1] +; CHECK: strh [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #1] %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -25,8 +24,7 @@ define i32 @word(%struct.b* %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: word: ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 ; CHECK: ldr [[REG1:w[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #2] -; CHECK: mov [[REG3:x[0-9]+]], [[REG2]] -; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #2] +; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #2] %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -41,8 +39,7 @@ define i64 @doubleword(%struct.c* %ctx, i32 %xor72) nounwind { ; CHECK-LABEL: doubleword: ; CHECK: ubfx [[REG:x[0-9]+]], x1, #9, #8 ; CHECK: ldr [[REG1:x[0-9]+]], [{{.*}}[[REG2:x[0-9]+]], [[REG]], lsl #3] -; CHECK: mov [[REG3:x[0-9]+]], [[REG2]] -; CHECK: str [[REG1]], [{{.*}}[[REG3]], [[REG]], lsl #3] +; CHECK: str [[REG1]], [{{.*}}[[REG2]], [[REG]], lsl #3] %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 diff --git a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll index 949de1a7c637..72e5ec6b89b5 100644 --- a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -8,9 +8,15 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK: add.2d v[[REG:[0-9]+]], v0, v1 ; CHECK: add d[[REG3:[0-9]+]], d[[REG]], d1 ; CHECK: sub d[[REG2:[0-9]+]], d[[REG]], d1 -; CHECK-NOT: fmov +; Without advanced copy optimization, we end up with cross register +; banks copies that cannot be coalesced. +; CHECK-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] +; With advanced copy optimization, we end up with just one copy +; to insert the computed high part into the V register. +; CHECK-OPT-NOT: fmov ; CHECK: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] -; CHECK-NOT: fmov +; CHECK-NOOPT: fmov d0, [[COPY_REG3]] +; CHECK-OPT-NOT: fmov ; CHECK: mov.d v0[1], [[COPY_REG2]] ; CHECK-NEXT: ret ; @@ -18,9 +24,11 @@ define <2 x i64> @bar(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; GENERIC: add v[[REG:[0-9]+]].2d, v0.2d, v1.2d ; GENERIC: add d[[REG3:[0-9]+]], d[[REG]], d1 ; GENERIC: sub d[[REG2:[0-9]+]], d[[REG]], d1 -; GENERIC-NOT: fmov +; GENERIC-NOOPT: fmov [[COPY_REG3:x[0-9]+]], d[[REG3]] +; GENERIC-OPT-NOT: fmov ; GENERIC: fmov [[COPY_REG2:x[0-9]+]], d[[REG2]] -; GENERIC-NOT: fmov +; GENERIC-NOOPT: fmov d0, [[COPY_REG3]] +; GENERIC-OPT-NOT: fmov ; GENERIC: mov v0.d[1], [[COPY_REG2]] ; GENERIC-NEXT: ret %add = add <2 x i64> %a, %b diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll index 60a62030e44b..c56d607aa812 100644 --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-regmov.ll @@ -4,10 +4,8 @@ define i32 @t(i32 %a, i32 %b, i32 %c, i32 %d) nounwind ssp { entry: ; CHECK-LABEL: t: -; CHECK: mov [[REG2:x[0-9]+]], x3 -; CHECK: mov [[REG1:x[0-9]+]], x2 -; CHECK: mov x0, x2 -; CHECK: mov x1, x3 +; CHECK: mov x0, [[REG1:x[0-9]+]] +; CHECK: mov x1, [[REG2:x[0-9]+]] ; CHECK: bl _foo ; CHECK: mov x0, [[REG1]] ; CHECK: mov x1, [[REG2]] diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll index 5ff3ddfe09a4..da0f7073acef 100644 --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -45,7 +45,8 @@ define i1 @test_return_bool(i8* %value, i8 %oldValue, i8 %newValue) { ; CHECK: [[FAILED]]: ; CHECK-NOT: cmp {{w[0-9]+}}, {{w[0-9]+}} -; CHECK: eor w0, wzr, #0x1 +; CHECK: mov [[TMP:w[0-9]+]], wzr +; CHECK: eor w0, [[TMP]], #0x1 ; CHECK: ret %pair = cmpxchg i8* %value, i8 %oldValue, i8 %newValue acq_rel monotonic diff --git a/llvm/test/CodeGen/AArch64/copyprop.mir b/llvm/test/CodeGen/AArch64/copyprop.mir deleted file mode 100644 index e23002c56907..000000000000 --- a/llvm/test/CodeGen/AArch64/copyprop.mir +++ /dev/null @@ -1,104 +0,0 @@ -# RUN: llc -mtriple=aarch64-linux-gnu -run-pass machine-cp -o - %s | FileCheck %s -# Tests for MachineCopyPropagation copy forwarding. ---- -# Simple forwarding. -# CHECK-LABEL: name: test1 -# CHECK: $x0 = SUBXri $x0, 1, 0 -name: test1 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - renamable $x1 = COPY $x0 - $x0 = SUBXri renamable $x1, 1, 0 -... ---- -# Don't forward if not renamable. -# CHECK-LABEL: name: test2 -# CHECK: $x0 = SUBXri $x1, 1, 0 -name: test2 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - $x1 = COPY $x0 - $x0 = SUBXri $x1, 1, 0 -... ---- -# Don't forward reserved non-constant reg values. -# CHECK-LABEL: name: test4 -# CHECK: $x0 = SUBXri renamable $x1, 1, 0 -name: test4 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - $sp = SUBXri $sp, 16, 0 - renamable $x1 = COPY $sp - $x0 = SUBXri renamable $x1, 1, 0 - $sp = ADDXri $sp, 16, 0 -... ---- -# Don't violate opcode constraints when forwarding. -# CHECK-LABEL: name: test5 -# CHECK: $x0 = SUBXri renamable $x1, 1, 0 -name: test5 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - renamable $x1 = COPY $xzr - $x0 = SUBXri renamable $x1, 1, 0 -... ---- -# Test cross-class COPY forwarding. -# CHECK-LABEL: name: test6 -# CHECK: $x2 = COPY $x0 -name: test6 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - renamable $d1 = COPY $x0 - $x2 = COPY renamable $d1 - RET_ReallyLR implicit $x2 -... ---- -# Don't forward if there are overlapping implicit operands. -# CHECK-LABEL: name: test7 -# CHECK: $w0 = SUBWri killed renamable $w1, 1, 0 -name: test7 -tracksRegLiveness: true -body: | - bb.0: - liveins: $w0 - renamable $w1 = COPY $w0 - $w0 = SUBWri killed renamable $w1, 1, 0, implicit killed $x1 -... ---- -# Check that kill flags are cleared. -# CHECK-LABEL: name: test8 -# CHECK: $x2 = ADDXri $x0, 1, 0 -# CHECK: $x0 = SUBXri $x0, 1, 0 -name: test8 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - renamable $x1 = COPY $x0 - $x2 = ADDXri killed $x0, 1, 0 - $x0 = SUBXri renamable $x1, 1, 0 -... ---- -# Don't forward if value is clobbered. -# CHECK-LABEL: name: test9 -# CHECK: $x2 = SUBXri renamable $x1, 1, 0 -name: test9 -tracksRegLiveness: true -body: | - bb.0: - liveins: $x0 - renamable $x1 = COPY $x0 - $x0 = ADDXri $x0, 1, 0 - $x2 = SUBXri renamable $x1, 1, 0 -... diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll index c6c279d7d213..06490e414e8b 100644 --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -489,7 +489,7 @@ else: ; CHECK-COMMON-LABEL: test_phi: ; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0 -; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0] +; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x[[PTR]]] ; CHECK-COMMON: [[LOOP:LBB[0-9_]+]]: ; CHECK-COMMON: mov.16b v[[R:[0-9]+]], v[[AB]] ; CHECK-COMMON: ldr h[[AB]], [x[[PTR]]] diff --git a/llvm/test/CodeGen/AArch64/flags-multiuse.ll b/llvm/test/CodeGen/AArch64/flags-multiuse.ll index a13f7e1e34ac..0827fb8c9e8c 100644 --- a/llvm/test/CodeGen/AArch64/flags-multiuse.ll +++ b/llvm/test/CodeGen/AArch64/flags-multiuse.ll @@ -17,9 +17,6 @@ define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) { %val = zext i1 %test to i32 ; CHECK: cset {{[xw][0-9]+}}, ne -; CHECK: mov [[RHSCOPY:w[0-9]+]], [[RHS]] -; CHECK: mov [[LHSCOPY:w[0-9]+]], [[LHS]] - store i32 %val, i32* @var call void @bar() @@ -28,7 +25,7 @@ define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) { ; Currently, the comparison is emitted again. An MSR/MRS pair would also be ; acceptable, but assuming the call preserves NZCV is not. br i1 %test, label %iftrue, label %iffalse -; CHECK: cmp [[LHSCOPY]], [[RHSCOPY]] +; CHECK: cmp [[LHS]], [[RHS]] ; CHECK: b.eq iftrue: diff --git a/llvm/test/CodeGen/AArch64/ldst-opt.ll b/llvm/test/CodeGen/AArch64/ldst-opt.ll index ae3f59ee8f5d..e416dcb0f16a 100644 --- a/llvm/test/CodeGen/AArch64/ldst-opt.ll +++ b/llvm/test/CodeGen/AArch64/ldst-opt.ll @@ -1671,7 +1671,7 @@ entry: ; CHECK-LABEL: bug34674: ; CHECK: // %entry ; CHECK-NEXT: mov [[ZREG:x[0-9]+]], xzr -; CHECK-DAG: stp xzr, xzr, [x0] +; CHECK-DAG: stp [[ZREG]], [[ZREG]], [x0] ; CHECK-DAG: add x{{[0-9]+}}, [[ZREG]], #1 define i64 @bug34674(<2 x i64>* %p) { entry: diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll index 3b68cbb8c2af..5bed63ef895f 100644 --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -11,7 +11,7 @@ entry: ; A53: mov [[DATA:w[0-9]+]], w1 ; A53: str q{{[0-9]+}}, {{.*}} ; A53: str q{{[0-9]+}}, {{.*}} -; A53: str w1, {{.*}} +; A53: str [[DATA]], {{.*}} %0 = bitcast %struct1* %fde to i8* tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false) diff --git a/llvm/test/CodeGen/AArch64/neg-imm.ll b/llvm/test/CodeGen/AArch64/neg-imm.ll index ee95f37c203c..46bded78cc59 100644 --- a/llvm/test/CodeGen/AArch64/neg-imm.ll +++ b/llvm/test/CodeGen/AArch64/neg-imm.ll @@ -7,8 +7,8 @@ declare void @foo(i32) define void @test(i32 %px) { ; CHECK_LABEL: test: ; CHECK_LABEL: %entry -; CHECK: subs [[REG0:w[0-9]+]], -; CHECK: csel {{w[0-9]+}}, wzr, [[REG0]] +; CHECK: subs +; CHECK-NEXT: csel entry: %sub = add nsw i32 %px, -1 %cmp = icmp slt i32 %px, 1 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll index 00cf7e6f503a..881bdd474e53 100644 --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -41,7 +41,7 @@ define float @caller(i8* %error_ref) { ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo ; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] @@ -264,7 +264,7 @@ define float @caller3(i8* %error_ref) { ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_sret ; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] @@ -358,7 +358,7 @@ define float @caller4(i8* %error_ref) { ; CHECK-APPLE: mov x21, xzr ; CHECK-APPLE: bl {{.*}}foo_vararg ; CHECK-APPLE: mov x0, x21 -; CHECK-APPLE: cbnz x21 +; CHECK-APPLE: cbnz x0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrb [[CODE:w[0-9]+]], [x0, #8] ; CHECK-APPLE: strb [[CODE]], [{{.*}}[[ID]]] diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll index 6a45dee857cc..8f9fa41df88d 100644 --- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll +++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll @@ -547,16 +547,16 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { ; GCN: s_mov_b32 s5, s32 ; GCN: s_add_u32 s32, s32, 0x300 -; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14 -; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15 -; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16 +; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-9]+]], s14 +; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-9]+]], s15 +; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-9]+]], s16 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7] ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9] ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11] -; GCN-DAG: s_mov_b32 s6, s14 -; GCN-DAG: s_mov_b32 s7, s15 -; GCN-DAG: s_mov_b32 s8, s16 +; GCN-DAG: s_mov_b32 s6, [[SAVE_X]] +; GCN-DAG: s_mov_b32 s7, [[SAVE_Y]] +; GCN-DAG: s_mov_b32 s8, [[SAVE_Z]] ; GCN: s_swappc_b64 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 diff --git a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir index 66a238dff820..c53ff28b547b 100644 --- a/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir +++ b/llvm/test/CodeGen/AMDGPU/fix-vgpr-copies.mir @@ -1,4 +1,4 @@ -# RUN: llc -march=amdgcn -start-after=greedy -disable-copyprop -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s +# RUN: llc -march=amdgcn -start-after=greedy -stop-after=si-optimize-exec-masking -o - %s | FileCheck %s # Check that we first do all vector instructions and only then change exec # CHECK-DAG: COPY $vgpr10_vgpr11 # CHECK-DAG: COPY $vgpr12_vgpr13 diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index 5917f87e19ad..8cc02d497098 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -78,7 +78,7 @@ ENDIF: ; preds = %LOOP ; Uses a copy intsead of an or ; GCN: s_mov_b64 [[COPY:s\[[0-9]+:[0-9]+\]]], [[BREAK_REG]] -; GCN: s_or_b64 [[BREAK_REG]], exec, [[BREAK_REG]] +; GCN: s_or_b64 [[BREAK_REG]], exec, [[COPY]] define amdgpu_kernel void @multi_if_break_loop(i32 %arg) #0 { bb: %id = call i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/ret.ll b/llvm/test/CodeGen/AMDGPU/ret.ll index e9007869b9fa..d587f6a3da2a 100644 --- a/llvm/test/CodeGen/AMDGPU/ret.ll +++ b/llvm/test/CodeGen/AMDGPU/ret.ll @@ -2,10 +2,10 @@ ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; GCN-LABEL: {{^}}vgpr: -; GCN-DAG: v_mov_b32_e32 v1, v0 -; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm +; GCN: v_mov_b32_e32 v1, v0 +; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 +; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm ; GCN: s_waitcnt expcnt(0) -; GCN: v_add_f32_e32 v0, 1.0, v0 ; GCN-NOT: s_endpgm define amdgpu_vs { float, float } @vgpr([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: @@ -204,13 +204,13 @@ bb: } ; GCN-LABEL: {{^}}both: -; GCN-DAG: exp mrt0 v0, v0, v0, v0 done vm -; GCN-DAG: v_mov_b32_e32 v1, v0 -; GCN-DAG: s_mov_b32 s1, s2 -; GCN: s_waitcnt expcnt(0) -; GCN: v_add_f32_e32 v0, 1.0, v0 +; GCN: v_mov_b32_e32 v1, v0 +; GCN-DAG: exp mrt0 v1, v1, v1, v1 done vm +; GCN-DAG: v_add_f32_e32 v0, 1.0, v1 ; GCN-DAG: s_add_i32 s0, s3, 2 -; GCN-DAG: s_mov_b32 s2, s3 +; GCN-DAG: s_mov_b32 s1, s2 +; GCN: s_mov_b32 s2, s3 +; GCN: s_waitcnt expcnt(0) ; GCN-NOT: s_endpgm define amdgpu_vs { float, i32, float, i32, i32 } @both([9 x <16 x i8>] addrspace(4)* byval %arg, i32 inreg %arg1, i32 inreg %arg2, float %arg3) #0 { bb: diff --git a/llvm/test/CodeGen/ARM/atomic-op.ll b/llvm/test/CodeGen/ARM/atomic-op.ll index 2337e835480d..644a7fbf8d9a 100644 --- a/llvm/test/CodeGen/ARM/atomic-op.ll +++ b/llvm/test/CodeGen/ARM/atomic-op.ll @@ -287,8 +287,7 @@ define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) { %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic %oldval = extractvalue { i32, i1 } %pair, 0 -; CHECK-ARMV7: mov r[[ADDR:[0-9]+]], r0 -; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r0] +; CHECK-ARMV7: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] ; CHECK-ARMV7: cmp [[OLDVAL]], r1 ; CHECK-ARMV7: bne [[FAIL_BB:\.?LBB[0-9]+_[0-9]+]] ; CHECK-ARMV7: dmb ish @@ -306,8 +305,7 @@ define i32 @test_cmpxchg_fail_order(i32 *%addr, i32 %desired, i32 %new) { ; CHECK-ARMV7: dmb ish ; CHECK-ARMV7: bx lr -; CHECK-T2: mov r[[ADDR:[0-9]+]], r0 -; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r0] +; CHECK-T2: ldrex [[OLDVAL:r[0-9]+]], [r[[ADDR:[0-9]+]]] ; CHECK-T2: cmp [[OLDVAL]], r1 ; CHECK-T2: bne [[FAIL_BB:\.?LBB.*]] ; CHECK-T2: dmb ish diff --git a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll index 835be7e949d3..5f78b13c18d1 100644 --- a/llvm/test/CodeGen/ARM/intrinsics-overflow.ll +++ b/llvm/test/CodeGen/ARM/intrinsics-overflow.ll @@ -39,7 +39,7 @@ define i32 @sadd_overflow(i32 %a, i32 %b) #0 { ; ARM: mov pc, lr ; THUMBV6: mov r[[R2:[0-9]+]], r[[R0:[0-9]+]] - ; THUMBV6: adds r[[R3:[0-9]+]], r[[R0]], r[[R1:[0-9]+]] + ; THUMBV6: adds r[[R3:[0-9]+]], r[[R2]], r[[R1:[0-9]+]] ; THUMBV6: movs r[[R0]], #0 ; THUMBV6: movs r[[R1]], #1 ; THUMBV6: cmp r[[R3]], r[[R2]] diff --git a/llvm/test/CodeGen/ARM/swifterror.ll b/llvm/test/CodeGen/ARM/swifterror.ll index 8d1729df2ae0..db339604597a 100644 --- a/llvm/test/CodeGen/ARM/swifterror.ll +++ b/llvm/test/CodeGen/ARM/swifterror.ll @@ -40,7 +40,7 @@ define float @caller(i8* %error_ref) { ; CHECK-APPLE-DAG: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo ; CHECK-APPLE: mov r0, r8 -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] @@ -181,7 +181,8 @@ define float @foo_loop(%swift_error** swifterror %error_ptr_ref, i32 %cc, float ; CHECK-APPLE: beq ; CHECK-APPLE: mov r0, #16 ; CHECK-APPLE: malloc -; CHECK-APPLE: strb r{{.*}}, [r0, #8] +; CHECK-APPLE: mov r8, r0 +; CHECK-APPLE: strb r{{.*}}, [r8, #8] ; CHECK-APPLE: ble ; CHECK-O0-LABEL: foo_loop: @@ -265,7 +266,7 @@ define float @caller3(i8* %error_ref) { ; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_sret ; CHECK-APPLE: mov r0, r8 -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] @@ -346,7 +347,7 @@ define float @caller4(i8* %error_ref) { ; CHECK-APPLE: mov r8, #0 ; CHECK-APPLE: bl {{.*}}foo_vararg ; CHECK-APPLE: mov r0, r8 -; CHECK-APPLE: cmp r8, #0 +; CHECK-APPLE: cmp r0, #0 ; Access part of the error object and save it to error_ref ; CHECK-APPLE: ldrbeq [[CODE:r[0-9]+]], [r0, #8] ; CHECK-APPLE: strbeq [[CODE]], [{{.*}}[[ID]]] diff --git a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll index 140f545f239a..5cbf51e3882a 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll @@ -800,7 +800,7 @@ define signext i128 @ashr_i128(i128 signext %a, i128 signext %b) { ; MMR3-NEXT: sw $5, 36($sp) # 4-byte Folded Spill ; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill ; MMR3-NEXT: lw $16, 76($sp) -; MMR3-NEXT: srlv $4, $7, $16 +; MMR3-NEXT: srlv $4, $8, $16 ; MMR3-NEXT: not16 $3, $16 ; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill ; MMR3-NEXT: sll16 $2, $6, 1 diff --git a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll index 79382e0df35a..d9756ddcf31d 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll @@ -828,7 +828,7 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) { ; MMR3-NEXT: move $17, $5 ; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill ; MMR3-NEXT: lw $16, 76($sp) -; MMR3-NEXT: srlv $7, $7, $16 +; MMR3-NEXT: srlv $7, $8, $16 ; MMR3-NEXT: not16 $3, $16 ; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill ; MMR3-NEXT: sll16 $2, $6, 1 @@ -919,14 +919,14 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) { ; MMR6-NEXT: not16 $5, $3 ; MMR6-NEXT: sw $5, 12($sp) # 4-byte Folded Spill ; MMR6-NEXT: move $17, $6 -; MMR6-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MMR6-NEXT: sll16 $6, $6, 1 +; MMR6-NEXT: sw $17, 16($sp) # 4-byte Folded Spill +; MMR6-NEXT: sll16 $6, $17, 1 ; MMR6-NEXT: sllv $6, $6, $5 ; MMR6-NEXT: or $8, $6, $2 ; MMR6-NEXT: addiu $5, $3, -64 ; MMR6-NEXT: srlv $9, $7, $5 ; MMR6-NEXT: move $6, $4 -; MMR6-NEXT: sll16 $2, $4, 1 +; MMR6-NEXT: sll16 $2, $6, 1 ; MMR6-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ; MMR6-NEXT: not16 $16, $5 ; MMR6-NEXT: sllv $10, $2, $16 @@ -948,7 +948,7 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) { ; MMR6-NEXT: selnez $11, $12, $4 ; MMR6-NEXT: sllv $12, $6, $2 ; MMR6-NEXT: move $7, $6 -; MMR6-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: not16 $2, $2 ; MMR6-NEXT: srl16 $6, $17, 1 ; MMR6-NEXT: srlv $2, $6, $2 diff --git a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll index 8c6138e0eba5..7d90b0ec8d09 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll @@ -857,7 +857,7 @@ define signext i128 @shl_i128(i128 signext %a, i128 signext %b) { ; MMR3-NEXT: sw $5, 32($sp) # 4-byte Folded Spill ; MMR3-NEXT: move $1, $4 ; MMR3-NEXT: lw $16, 76($sp) -; MMR3-NEXT: sllv $2, $4, $16 +; MMR3-NEXT: sllv $2, $1, $16 ; MMR3-NEXT: not16 $4, $16 ; MMR3-NEXT: sw $4, 24($sp) # 4-byte Folded Spill ; MMR3-NEXT: srl16 $3, $5, 1 @@ -945,7 +945,7 @@ define signext i128 @shl_i128(i128 signext %a, i128 signext %b) { ; MMR6-NEXT: .cfi_offset 16, -8 ; MMR6-NEXT: move $11, $4 ; MMR6-NEXT: lw $3, 44($sp) -; MMR6-NEXT: sllv $1, $4, $3 +; MMR6-NEXT: sllv $1, $11, $3 ; MMR6-NEXT: not16 $2, $3 ; MMR6-NEXT: sw $2, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: srl16 $16, $5, 1 diff --git a/llvm/test/CodeGen/Mips/llvm-ir/sub.ll b/llvm/test/CodeGen/Mips/llvm-ir/sub.ll index d839a6e4c88c..d06170f1db15 100644 --- a/llvm/test/CodeGen/Mips/llvm-ir/sub.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/sub.ll @@ -163,7 +163,7 @@ entry: ; MMR3: subu16 $5, $[[T19]], $[[T20]] ; MMR6: move $[[T0:[0-9]+]], $7 -; MMR6: sw $7, 8($sp) +; MMR6: sw $[[T0]], 8($sp) ; MMR6: move $[[T1:[0-9]+]], $5 ; MMR6: sw $4, 12($sp) ; MMR6: lw $[[T2:[0-9]+]], 48($sp) diff --git a/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll b/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll index fa5916aa98e1..c72523f35937 100644 --- a/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll +++ b/llvm/test/CodeGen/PowerPC/MCSE-caller-preserved-reg.ll @@ -20,9 +20,9 @@ define noalias i8* @_ZN2CC3funEv(%class.CC* %this) { ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset lr, 16 ; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: ld 12, 0(3) ; CHECK-NEXT: std 30, 32(1) ; CHECK-NEXT: mr 30, 3 +; CHECK-NEXT: ld 12, 0(30) ; CHECK-NEXT: std 2, 24(1) ; CHECK-NEXT: mtctr 12 ; CHECK-NEXT: bctrl diff --git a/llvm/test/CodeGen/PowerPC/fma-mutate.ll b/llvm/test/CodeGen/PowerPC/fma-mutate.ll index 1d4695b31810..633afa45115a 100644 --- a/llvm/test/CodeGen/PowerPC/fma-mutate.ll +++ b/llvm/test/CodeGen/PowerPC/fma-mutate.ll @@ -14,8 +14,7 @@ define double @foo3(double %a) nounwind { ret double %r ; CHECK: @foo3 -; CHECK: fmr [[REG:[0-9]+]], [[REG2:[0-9]+]] -; CHECK: xsnmsubadp [[REG]], {{[0-9]+}}, [[REG2]] +; CHECK: xsnmsubadp [[REG:[0-9]+]], {{[0-9]+}}, [[REG]] ; CHECK: xsmaddmdp ; CHECK: xsmaddadp } diff --git a/llvm/test/CodeGen/PowerPC/gpr-vsr-spill.ll b/llvm/test/CodeGen/PowerPC/gpr-vsr-spill.ll index be9df368df42..218241ae0a79 100644 --- a/llvm/test/CodeGen/PowerPC/gpr-vsr-spill.ll +++ b/llvm/test/CodeGen/PowerPC/gpr-vsr-spill.ll @@ -16,8 +16,8 @@ if.end: ; preds = %if.then, %entry ret i32 %e.0 ; CHECK: @foo ; CHECK: mr [[NEWREG:[0-9]+]], 3 -; CHECK: mr [[REG1:[0-9]+]], 4 ; CHECK: mtvsrd [[NEWREG2:[0-9]+]], 4 +; CHECK: mffprd [[REG1:[0-9]+]], [[NEWREG2]] ; CHECK: add {{[0-9]+}}, [[NEWREG]], [[REG1]] ; CHECK: mffprd [[REG2:[0-9]+]], [[NEWREG2]] ; CHECK: add {{[0-9]+}}, [[REG2]], [[NEWREG]] diff --git a/llvm/test/CodeGen/PowerPC/licm-remat.ll b/llvm/test/CodeGen/PowerPC/licm-remat.ll index 0473709bb576..e72a8b0cd3e4 100644 --- a/llvm/test/CodeGen/PowerPC/licm-remat.ll +++ b/llvm/test/CodeGen/PowerPC/licm-remat.ll @@ -20,8 +20,8 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r define linkonce_odr void @ZN6snappyDecompressor_(%"class.snappy::SnappyDecompressor"* %this, %"class.snappy::SnappyIOVecWriter"* %writer) { ; CHECK-LABEL: ZN6snappyDecompressor_: ; CHECK: # %bb.0: # %entry -; CHECK: addis 23, 2, _ZN6snappy8internalL8wordmaskE@toc@ha -; CHECK-DAG: addi 25, 23, _ZN6snappy8internalL8wordmaskE@toc@l +; CHECK: addis 3, 2, _ZN6snappy8internalL8wordmaskE@toc@ha +; CHECK-DAG: addi 25, 3, _ZN6snappy8internalL8wordmaskE@toc@l ; CHECK-DAG: addis 5, 2, _ZN6snappy8internalL10char_tableE@toc@ha ; CHECK-DAG: addi 24, 5, _ZN6snappy8internalL10char_tableE@toc@l ; CHECK: b .LBB0_2 diff --git a/llvm/test/CodeGen/PowerPC/opt-li-add-to-addi.ll b/llvm/test/CodeGen/PowerPC/opt-li-add-to-addi.ll index 67fd5d3e3085..43d8445b7cfa 100644 --- a/llvm/test/CodeGen/PowerPC/opt-li-add-to-addi.ll +++ b/llvm/test/CodeGen/PowerPC/opt-li-add-to-addi.ll @@ -3,7 +3,7 @@ define i64 @testOptimizeLiAddToAddi(i64 %a) { ; CHECK-LABEL: testOptimizeLiAddToAddi: -; CHECK: addi 3, 3, 2444 +; CHECK: addi 3, 30, 2444 ; CHECK: bl callv ; CHECK: addi 3, 30, 234 ; CHECK: bl call diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll index a4e232941c88..c9019983e933 100644 --- a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll +++ b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll @@ -25,7 +25,7 @@ target triple = "powerpc64le-grtev4-linux-gnu" ;CHECK-LABEL: straight_test: ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1 +;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 ;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 ;CHECK-NEXT: rlwinm. {{[0-9]+}}, [[TAGREG]], 0, 30, 30 diff --git a/llvm/test/CodeGen/SPARC/32abi.ll b/llvm/test/CodeGen/SPARC/32abi.ll index 985e77b86fd7..3807f84d4e92 100644 --- a/llvm/test/CodeGen/SPARC/32abi.ll +++ b/llvm/test/CodeGen/SPARC/32abi.ll @@ -156,9 +156,9 @@ define double @floatarg(double %a0, ; %i0,%i1 ; HARD-NEXT: std %o0, [%sp+96] ; HARD-NEXT: st %o1, [%sp+92] ; HARD-NEXT: mov %i0, %o2 -; HARD-NEXT: mov %i1, %o3 +; HARD-NEXT: mov %o0, %o3 ; HARD-NEXT: mov %o1, %o4 -; HARD-NEXT: mov %i1, %o5 +; HARD-NEXT: mov %o0, %o5 ; HARD-NEXT: call floatarg ; HARD: std %f0, [%i4] ; SOFT: st %i0, [%sp+104] diff --git a/llvm/test/CodeGen/SPARC/atomics.ll b/llvm/test/CodeGen/SPARC/atomics.ll index ac095e60fa06..5e608e728c37 100644 --- a/llvm/test/CodeGen/SPARC/atomics.ll +++ b/llvm/test/CodeGen/SPARC/atomics.ll @@ -235,9 +235,8 @@ entry: ; CHECK-LABEL: test_load_add_i32 ; CHECK: membar -; CHECK: mov [[U:%[gilo][0-7]]], [[V:%[gilo][0-7]]] -; CHECK: add [[U:%[gilo][0-7]]], %o1, [[V2:%[gilo][0-7]]] -; CHECK: cas [%o0], [[V]], [[V2]] +; CHECK: add [[V:%[gilo][0-7]]], %o1, [[U:%[gilo][0-7]]] +; CHECK: cas [%o0], [[V]], [[U]] ; CHECK: membar define zeroext i32 @test_load_add_i32(i32* %p, i32 zeroext %v) { entry: diff --git a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll index cc276c3b7697..2ee251a812b7 100644 --- a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll @@ -46,12 +46,12 @@ define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) { ; CHECK-LABEL: f5: ; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24 ; CHECK-DAG: vlr %v[[A2:[0-5]]], %v26 -; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v24, 1 -; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v26, 1 -; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v24, 2 -; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v26, 2 -; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v24, 3 -; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v26, 3 +; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v[[A1]], 1 +; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v[[A2]], 1 +; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v[[A1]], 2 +; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2 +; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3 +; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3 ; CHECK-DAG: sebr %f[[A1]], %f[[A2]] ; CHECK-DAG: sebr %f[[B1]], %f[[B2]] ; CHECK-DAG: sebr %f[[C1]], %f[[C2]] diff --git a/llvm/test/CodeGen/Thumb/pr35836.ll b/llvm/test/CodeGen/Thumb/pr35836.ll index 743c73c799d3..7765e66658a0 100644 --- a/llvm/test/CodeGen/Thumb/pr35836.ll +++ b/llvm/test/CodeGen/Thumb/pr35836.ll @@ -37,13 +37,13 @@ while.body: ; CHECK: adds r3, r0, r1 ; CHECK: push {r5} ; CHECK: pop {r1} -; CHECK: adcs r1, r5 +; CHECK: adcs r1, r1 ; CHECK: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK: ldr r2, [sp, #8] @ 4-byte Reload ; CHECK: adds r2, r0, r2 ; CHECK: push {r5} ; CHECK: pop {r4} -; CHECK: adcs r4, r5 +; CHECK: adcs r4, r4 ; CHECK: adds r0, r2, r5 ; CHECK: push {r3} ; CHECK: pop {r0} diff --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll index 471626251d56..07d724546e9f 100644 --- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -598,7 +598,7 @@ declare void @abort() #0 define i32 @b_to_bx(i32 %value) { ; CHECK-LABEL: b_to_bx: ; DISABLE: push {r7, lr} -; CHECK: cmp r0, #49 +; CHECK: cmp r1, #49 ; CHECK-NEXT: bgt [[ELSE_LABEL:LBB[0-9_]+]] ; ENABLE: push {r7, lr} diff --git a/llvm/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll b/llvm/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll index 0805f7a3704a..4bc6b1a53d9d 100644 --- a/llvm/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll +++ b/llvm/test/CodeGen/X86/2006-03-01-InstrSchedBug.ll @@ -7,7 +7,7 @@ define i32 @f(i32 %a, i32 %b) { ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %ecx, %edx -; CHECK-NEXT: imull %ecx, %edx +; CHECK-NEXT: imull %edx, %edx ; CHECK-NEXT: imull %eax, %ecx ; CHECK-NEXT: imull %eax, %eax ; CHECK-NEXT: addl %edx, %eax diff --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll index 603e50ff30a3..126f5a1c7976 100644 --- a/llvm/test/CodeGen/X86/arg-copy-elide.ll +++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll @@ -106,7 +106,7 @@ entry: ; CHECK-DAG: movl %edx, %[[r1:[^ ]*]] ; CHECK-DAG: movl 8(%ebp), %[[r2:[^ ]*]] ; CHECK-DAG: movl %[[r2]], 4(%esp) -; CHECK-DAG: movl %edx, (%esp) +; CHECK-DAG: movl %[[r1]], (%esp) ; CHECK: movl %esp, %[[reg:[^ ]*]] ; CHECK: pushl %[[reg]] ; CHECK: calll _addrof_i64 diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll index c6667340c7cb..e82db7525f5e 100644 --- a/llvm/test/CodeGen/X86/avx-load-store.ll +++ b/llvm/test/CodeGen/X86/avx-load-store.ll @@ -12,11 +12,11 @@ define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* ; CHECK-NEXT: movq %rdx, %r14 ; CHECK-NEXT: movq %rsi, %r15 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: vmovaps (%rdi), %ymm0 +; CHECK-NEXT: vmovaps (%rbx), %ymm0 ; CHECK-NEXT: vmovups %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK-NEXT: vmovaps (%rsi), %ymm1 +; CHECK-NEXT: vmovaps (%r15), %ymm1 ; CHECK-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill -; CHECK-NEXT: vmovaps (%rdx), %ymm2 +; CHECK-NEXT: vmovaps (%r14), %ymm2 ; CHECK-NEXT: vmovups %ymm2, (%rsp) # 32-byte Spill ; CHECK-NEXT: callq dummy ; CHECK-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll index b48bf4fe2549..49c98bb5457b 100644 --- a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll +++ b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll @@ -9,10 +9,10 @@ define void @bar__512(<16 x i32>* %var) #0 { ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: subq $112, %rsp ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: vmovups (%rdi), %zmm0 +; CHECK-NEXT: vmovups (%rbx), %zmm0 ; CHECK-NEXT: vmovups %zmm0, (%rsp) ## 64-byte Spill ; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; CHECK-NEXT: vmovaps %zmm1, (%rdi) +; CHECK-NEXT: vmovaps %zmm1, (%rbx) ; CHECK-NEXT: callq _Print__512 ; CHECK-NEXT: vmovups (%rsp), %zmm0 ## 64-byte Reload ; CHECK-NEXT: callq _Print__512 diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll index b313ee47aa0f..448a040403c3 100644 --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -355,7 +355,7 @@ define i32 @test12(i32 %a1, i32 %a2, i32 %b1) { ; KNL_X32-NEXT: movl %edi, (%esp) ; KNL_X32-NEXT: calll _test11 ; KNL_X32-NEXT: movl %eax, %ebx -; KNL_X32-NEXT: movzbl %al, %eax +; KNL_X32-NEXT: movzbl %bl, %eax ; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp) ; KNL_X32-NEXT: movl %edi, (%esp) diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll index 61718fb8c04e..1e64330107a6 100644 --- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll +++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll @@ -148,7 +148,7 @@ define <16 x float> @testf16_regs(<16 x float> %a, <16 x float> %b) nounwind { ; X64-NEXT: andq $-64, %rsp ; X64-NEXT: subq $128, %rsp ; X64-NEXT: vmovaps %zmm1, %zmm16 -; X64-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0 ; X64-NEXT: movq %rsp, %rdi ; X64-NEXT: callq _func_float16_ptr ; X64-NEXT: vaddps %zmm16, %zmm0, %zmm0 diff --git a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll index 58636920ea4a..ebbf7b672670 100644 --- a/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll +++ b/llvm/test/CodeGen/X86/avx512-regcall-NoMask.ll @@ -650,7 +650,7 @@ define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a) { ; X32-NEXT: subl $24, %esp ; X32-NEXT: vmovups %xmm4, (%esp) # 16-byte Spill ; X32-NEXT: vmovdqa %xmm0, %xmm4 -; X32-NEXT: vmovdqa %xmm0, %xmm1 +; X32-NEXT: vmovdqa %xmm4, %xmm1 ; X32-NEXT: calll _test_argRet128Vector ; X32-NEXT: vmovdqa32 %xmm4, %xmm0 {%k1} ; X32-NEXT: vmovups (%esp), %xmm4 # 16-byte Reload @@ -668,7 +668,7 @@ define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a) { ; WIN64-NEXT: .seh_savexmm 8, 0 ; WIN64-NEXT: .seh_endprologue ; WIN64-NEXT: vmovdqa %xmm0, %xmm8 -; WIN64-NEXT: vmovdqa %xmm0, %xmm1 +; WIN64-NEXT: vmovdqa %xmm8, %xmm1 ; WIN64-NEXT: callq test_argRet128Vector ; WIN64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1} ; WIN64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload @@ -689,7 +689,7 @@ define x86_regcallcc <4 x i32> @test_CallargRet128Vector(<4 x i32> %a) { ; LINUXOSX64-NEXT: .cfi_offset %rsp, -16 ; LINUXOSX64-NEXT: .cfi_offset %xmm8, -32 ; LINUXOSX64-NEXT: vmovdqa %xmm0, %xmm8 -; LINUXOSX64-NEXT: vmovdqa %xmm0, %xmm1 +; LINUXOSX64-NEXT: vmovdqa %xmm8, %xmm1 ; LINUXOSX64-NEXT: callq test_argRet128Vector ; LINUXOSX64-NEXT: vmovdqa32 %xmm8, %xmm0 {%k1} ; LINUXOSX64-NEXT: vmovaps (%rsp), %xmm8 # 16-byte Reload @@ -908,12 +908,12 @@ define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a ; X32-NEXT: subl $20, %esp ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edi, %esi -; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: subl %ecx, %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi ; X32-NEXT: movl %edi, %ebp diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll index e176986c2b68..b96233aa7880 100644 --- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll +++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll @@ -38,7 +38,7 @@ define <4 x float> @test_negative_zero_1(<4 x float> %A) { ; SSE2-LABEL: test_negative_zero_1: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll index 72b7f76ec4d9..cd4c5eca54ac 100644 --- a/llvm/test/CodeGen/X86/combine-fcopysign.ll +++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll @@ -197,8 +197,8 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float ; SSE-NEXT: cvtss2sd %xmm2, %xmm4 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] ; SSE-NEXT: movaps %xmm2, %xmm6 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1] -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3] +; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm6[1,1] +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE-NEXT: movaps {{.*#+}} xmm7 ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: andps %xmm7, %xmm2 @@ -213,7 +213,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float ; SSE-NEXT: orps %xmm0, %xmm4 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] ; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE-NEXT: andps %xmm7, %xmm0 ; SSE-NEXT: cvtss2sd %xmm3, %xmm3 ; SSE-NEXT: andps %xmm8, %xmm3 @@ -260,7 +260,7 @@ define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x doubl ; SSE-NEXT: orps %xmm6, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: movaps %xmm3, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: andps %xmm5, %xmm1 ; SSE-NEXT: xorps %xmm6, %xmm6 ; SSE-NEXT: cvtsd2ss %xmm2, %xmm6 diff --git a/llvm/test/CodeGen/X86/combine-shl.ll b/llvm/test/CodeGen/X86/combine-shl.ll index 4184de12a3cf..e5a39a89acce 100644 --- a/llvm/test/CodeGen/X86/combine-shl.ll +++ b/llvm/test/CodeGen/X86/combine-shl.ll @@ -188,7 +188,7 @@ define <8 x i32> @combine_vec_shl_ext_shl0(<8 x i16> %x) { ; SSE-LABEL: combine_vec_shl_ext_shl0: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; SSE-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] ; SSE-NEXT: pslld $20, %xmm1 ; SSE-NEXT: pslld $20, %xmm0 diff --git a/llvm/test/CodeGen/X86/complex-fastmath.ll b/llvm/test/CodeGen/X86/complex-fastmath.ll index bae1ac35d146..9c02ac6667f6 100644 --- a/llvm/test/CodeGen/X86/complex-fastmath.ll +++ b/llvm/test/CodeGen/X86/complex-fastmath.ll @@ -14,7 +14,7 @@ define <2 x float> @complex_square_f32(<2 x float>) #0 { ; SSE: # %bb.0: ; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: addss %xmm0, %xmm2 +; SSE-NEXT: addss %xmm2, %xmm2 ; SSE-NEXT: mulss %xmm1, %xmm2 ; SSE-NEXT: mulss %xmm0, %xmm0 ; SSE-NEXT: mulss %xmm1, %xmm1 @@ -58,9 +58,9 @@ define <2 x double> @complex_square_f64(<2 x double>) #0 { ; SSE-LABEL: complex_square_f64: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: addsd %xmm0, %xmm2 +; SSE-NEXT: addsd %xmm2, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: mulsd %xmm0, %xmm0 ; SSE-NEXT: mulsd %xmm1, %xmm1 @@ -161,9 +161,9 @@ define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 { ; SSE-LABEL: complex_mul_f64: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll index cc2dc1b1d094..ed6c0fafe2ef 100644 --- a/llvm/test/CodeGen/X86/divide-by-constant.ll +++ b/llvm/test/CodeGen/X86/divide-by-constant.ll @@ -312,7 +312,7 @@ define i64 @PR23590(i64 %x) nounwind { ; X64: # %bb.0: # %entry ; X64-NEXT: movq %rdi, %rcx ; X64-NEXT: movabsq $6120523590596543007, %rdx # imm = 0x54F077C718E7C21F -; X64-NEXT: movq %rdi, %rax +; X64-NEXT: movq %rcx, %rax ; X64-NEXT: mulq %rdx ; X64-NEXT: shrq $12, %rdx ; X64-NEXT: imulq $12345, %rdx, %rax # imm = 0x3039 diff --git a/llvm/test/CodeGen/X86/fixup-sfb.ll b/llvm/test/CodeGen/X86/fixup-sfb.ll index e73b0bb447f1..150ff4d29294 100644 --- a/llvm/test/CodeGen/X86/fixup-sfb.ll +++ b/llvm/test/CodeGen/X86/fixup-sfb.ll @@ -4,9 +4,9 @@ ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 | FileCheck %s -check-prefix=CHECK-AVX2 ; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx | FileCheck %s -check-prefix=CHECK-AVX512 -; RUN: llc < %s -mtriple=i686-linux -; RUN: llc < %s -mtriple=i686-linux --disable-fixup-SFB -; RUN: llc < %s -mtriple=i686-linux -mattr sse4 +; RUN: llc < %s -mtriple=i686-linux +; RUN: llc < %s -mtriple=i686-linux --disable-fixup-SFB +; RUN: llc < %s -mtriple=i686-linux -mattr sse4 ; RUN: llc < %s -mtriple=i686-linux -mattr avx512 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" @@ -814,7 +814,7 @@ define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %s ; CHECK-NEXT: movl %edx, %ebp ; CHECK-NEXT: movq %rsi, %r12 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movl %r9d, 12(%rdi) +; CHECK-NEXT: movl %r9d, 12(%rbx) ; CHECK-NEXT: callq bar ; CHECK-NEXT: cmpl $18, %ebp ; CHECK-NEXT: jl .LBB9_2 @@ -856,7 +856,7 @@ define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %s ; DISABLED-NEXT: movl %edx, %ebp ; DISABLED-NEXT: movq %rsi, %r12 ; DISABLED-NEXT: movq %rdi, %rbx -; DISABLED-NEXT: movl %r9d, 12(%rdi) +; DISABLED-NEXT: movl %r9d, 12(%rbx) ; DISABLED-NEXT: callq bar ; DISABLED-NEXT: cmpl $18, %ebp ; DISABLED-NEXT: jl .LBB9_2 @@ -898,7 +898,7 @@ define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %s ; CHECK-AVX2-NEXT: movl %edx, %ebp ; CHECK-AVX2-NEXT: movq %rsi, %r12 ; CHECK-AVX2-NEXT: movq %rdi, %rbx -; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX2-NEXT: movl %r9d, 12(%rbx) ; CHECK-AVX2-NEXT: callq bar ; CHECK-AVX2-NEXT: cmpl $18, %ebp ; CHECK-AVX2-NEXT: jl .LBB9_2 @@ -940,7 +940,7 @@ define void @test_limit_all(%struct.S* %s1, %struct.S* nocapture %s2, i32 %x, %s ; CHECK-AVX512-NEXT: movl %edx, %ebp ; CHECK-AVX512-NEXT: movq %rsi, %r12 ; CHECK-AVX512-NEXT: movq %rdi, %rbx -; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX512-NEXT: movl %r9d, 12(%rbx) ; CHECK-AVX512-NEXT: callq bar ; CHECK-AVX512-NEXT: cmpl $18, %ebp ; CHECK-AVX512-NEXT: jl .LBB9_2 @@ -1004,7 +1004,7 @@ define void @test_limit_one_pred(%struct.S* %s1, %struct.S* nocapture %s2, i32 % ; CHECK-NEXT: movq %rcx, %r15 ; CHECK-NEXT: movq %rsi, %r14 ; CHECK-NEXT: movq %rdi, %rbx -; CHECK-NEXT: movl %r9d, 12(%rdi) +; CHECK-NEXT: movl %r9d, 12(%rbx) ; CHECK-NEXT: cmpl $18, %edx ; CHECK-NEXT: jl .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if.then @@ -1047,7 +1047,7 @@ define void @test_limit_one_pred(%struct.S* %s1, %struct.S* nocapture %s2, i32 % ; DISABLED-NEXT: movq %rcx, %r14 ; DISABLED-NEXT: movq %rsi, %r12 ; DISABLED-NEXT: movq %rdi, %rbx -; DISABLED-NEXT: movl %r9d, 12(%rdi) +; DISABLED-NEXT: movl %r9d, 12(%rbx) ; DISABLED-NEXT: cmpl $18, %edx ; DISABLED-NEXT: jl .LBB10_2 ; DISABLED-NEXT: # %bb.1: # %if.then @@ -1086,7 +1086,7 @@ define void @test_limit_one_pred(%struct.S* %s1, %struct.S* nocapture %s2, i32 % ; CHECK-AVX2-NEXT: movq %rcx, %r15 ; CHECK-AVX2-NEXT: movq %rsi, %r14 ; CHECK-AVX2-NEXT: movq %rdi, %rbx -; CHECK-AVX2-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX2-NEXT: movl %r9d, 12(%rbx) ; CHECK-AVX2-NEXT: cmpl $18, %edx ; CHECK-AVX2-NEXT: jl .LBB10_2 ; CHECK-AVX2-NEXT: # %bb.1: # %if.then @@ -1129,7 +1129,7 @@ define void @test_limit_one_pred(%struct.S* %s1, %struct.S* nocapture %s2, i32 % ; CHECK-AVX512-NEXT: movq %rcx, %r15 ; CHECK-AVX512-NEXT: movq %rsi, %r14 ; CHECK-AVX512-NEXT: movq %rdi, %rbx -; CHECK-AVX512-NEXT: movl %r9d, 12(%rdi) +; CHECK-AVX512-NEXT: movl %r9d, 12(%rbx) ; CHECK-AVX512-NEXT: cmpl $18, %edx ; CHECK-AVX512-NEXT: jl .LBB10_2 ; CHECK-AVX512-NEXT: # %bb.1: # %if.then diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll index 665fb708e4b0..ebfbd064572a 100644 --- a/llvm/test/CodeGen/X86/fmaxnum.ll +++ b/llvm/test/CodeGen/X86/fmaxnum.ll @@ -18,7 +18,7 @@ declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) ; CHECK-LABEL: @test_fmaxf ; SSE: movaps %xmm0, %xmm2 -; SSE-NEXT: cmpunordss %xmm0, %xmm2 +; SSE-NEXT: cmpunordss %xmm2, %xmm2 ; SSE-NEXT: movaps %xmm2, %xmm3 ; SSE-NEXT: andps %xmm1, %xmm3 ; SSE-NEXT: maxss %xmm0, %xmm1 @@ -47,7 +47,7 @@ define float @test_fmaxf_minsize(float %x, float %y) minsize { ; CHECK-LABEL: @test_fmax ; SSE: movapd %xmm0, %xmm2 -; SSE-NEXT: cmpunordsd %xmm0, %xmm2 +; SSE-NEXT: cmpunordsd %xmm2, %xmm2 ; SSE-NEXT: movapd %xmm2, %xmm3 ; SSE-NEXT: andpd %xmm1, %xmm3 ; SSE-NEXT: maxsd %xmm0, %xmm1 @@ -74,7 +74,7 @@ define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) { ; CHECK-LABEL: @test_intrinsic_fmaxf ; SSE: movaps %xmm0, %xmm2 -; SSE-NEXT: cmpunordss %xmm0, %xmm2 +; SSE-NEXT: cmpunordss %xmm2, %xmm2 ; SSE-NEXT: movaps %xmm2, %xmm3 ; SSE-NEXT: andps %xmm1, %xmm3 ; SSE-NEXT: maxss %xmm0, %xmm1 @@ -95,7 +95,7 @@ define float @test_intrinsic_fmaxf(float %x, float %y) { ; CHECK-LABEL: @test_intrinsic_fmax ; SSE: movapd %xmm0, %xmm2 -; SSE-NEXT: cmpunordsd %xmm0, %xmm2 +; SSE-NEXT: cmpunordsd %xmm2, %xmm2 ; SSE-NEXT: movapd %xmm2, %xmm3 ; SSE-NEXT: andpd %xmm1, %xmm3 ; SSE-NEXT: maxsd %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/fmf-flags.ll b/llvm/test/CodeGen/X86/fmf-flags.ll index ca520b3d6759..00c73c1ffb86 100644 --- a/llvm/test/CodeGen/X86/fmf-flags.ll +++ b/llvm/test/CodeGen/X86/fmf-flags.ll @@ -30,7 +30,7 @@ define float @fast_fmuladd_opts(float %a , float %b , float %c) { ; X64-LABEL: fast_fmuladd_opts: ; X64: # %bb.0: ; X64-NEXT: movaps %xmm0, %xmm1 -; X64-NEXT: addss %xmm0, %xmm1 +; X64-NEXT: addss %xmm1, %xmm1 ; X64-NEXT: addss %xmm0, %xmm1 ; X64-NEXT: movaps %xmm1, %xmm0 ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll index e0e5df702df6..afe8b804f267 100644 --- a/llvm/test/CodeGen/X86/fminnum.ll +++ b/llvm/test/CodeGen/X86/fminnum.ll @@ -18,7 +18,7 @@ declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>) ; CHECK-LABEL: @test_fminf ; SSE: movaps %xmm0, %xmm2 -; SSE-NEXT: cmpunordss %xmm0, %xmm2 +; SSE-NEXT: cmpunordss %xmm2, %xmm2 ; SSE-NEXT: movaps %xmm2, %xmm3 ; SSE-NEXT: andps %xmm1, %xmm3 ; SSE-NEXT: minss %xmm0, %xmm1 @@ -40,7 +40,7 @@ define float @test_fminf(float %x, float %y) { ; CHECK-LABEL: @test_fmin ; SSE: movapd %xmm0, %xmm2 -; SSE-NEXT: cmpunordsd %xmm0, %xmm2 +; SSE-NEXT: cmpunordsd %xmm2, %xmm2 ; SSE-NEXT: movapd %xmm2, %xmm3 ; SSE-NEXT: andpd %xmm1, %xmm3 ; SSE-NEXT: minsd %xmm0, %xmm1 @@ -67,7 +67,7 @@ define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) { ; CHECK-LABEL: @test_intrinsic_fminf ; SSE: movaps %xmm0, %xmm2 -; SSE-NEXT: cmpunordss %xmm0, %xmm2 +; SSE-NEXT: cmpunordss %xmm2, %xmm2 ; SSE-NEXT: movaps %xmm2, %xmm3 ; SSE-NEXT: andps %xmm1, %xmm3 ; SSE-NEXT: minss %xmm0, %xmm1 @@ -87,7 +87,7 @@ define float @test_intrinsic_fminf(float %x, float %y) { ; CHECK-LABEL: @test_intrinsic_fmin ; SSE: movapd %xmm0, %xmm2 -; SSE-NEXT: cmpunordsd %xmm0, %xmm2 +; SSE-NEXT: cmpunordsd %xmm2, %xmm2 ; SSE-NEXT: movapd %xmm2, %xmm3 ; SSE-NEXT: andpd %xmm1, %xmm3 ; SSE-NEXT: minsd %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/fp128-i128.ll b/llvm/test/CodeGen/X86/fp128-i128.ll index f61173755ce5..54e2aab37ecb 100644 --- a/llvm/test/CodeGen/X86/fp128-i128.ll +++ b/llvm/test/CodeGen/X86/fp128-i128.ll @@ -227,7 +227,7 @@ define fp128 @TestI128_4(fp128 %x) #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq $0, (%rsp) @@ -275,7 +275,7 @@ define fp128 @acosl(fp128 %x) #0 { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: movaps %xmm0, %xmm1 -; CHECK-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movaps %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movq $0, (%rsp) diff --git a/llvm/test/CodeGen/X86/h-registers-1.ll b/llvm/test/CodeGen/X86/h-registers-1.ll index 2900475be7f9..43b086819a26 100644 --- a/llvm/test/CodeGen/X86/h-registers-1.ll +++ b/llvm/test/CodeGen/X86/h-registers-1.ll @@ -32,7 +32,8 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) ; CHECK-NEXT: movzbl %ah, %eax ; CHECK-NEXT: movl {{[0-9]+}}(%rsp), %ebx ; CHECK-NEXT: movzbl %bh, %edi -; CHECK-NEXT: addq %r10, %rsi +; CHECK-NEXT: movq %r10, %r8 +; CHECK-NEXT: addq %r8, %rsi ; CHECK-NEXT: addq %r11, %rdx ; CHECK-NEXT: addq %rsi, %rdx ; CHECK-NEXT: addq %rbp, %rcx @@ -67,7 +68,8 @@ define i64 @foo(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) ; GNUX32-NEXT: movzbl %ah, %eax ; GNUX32-NEXT: movl {{[0-9]+}}(%esp), %ebx ; GNUX32-NEXT: movzbl %bh, %edi -; GNUX32-NEXT: addq %r10, %rsi +; GNUX32-NEXT: movq %r10, %r8 +; GNUX32-NEXT: addq %r8, %rsi ; GNUX32-NEXT: addq %r11, %rdx ; GNUX32-NEXT: addq %rsi, %rdx ; GNUX32-NEXT: addq %rbp, %rcx diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/CodeGen/X86/haddsub-2.ll index a83a0459a3a7..2b8b8c909d17 100644 --- a/llvm/test/CodeGen/X86/haddsub-2.ll +++ b/llvm/test/CodeGen/X86/haddsub-2.ll @@ -896,16 +896,16 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; SSE-LABEL: not_a_hsub_2: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE-NEXT: subss %xmm3, %xmm2 ; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE-NEXT: subss %xmm3, %xmm0 ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm1[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE-NEXT: movaps %xmm1, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: subss %xmm4, %xmm3 ; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE-NEXT: subss %xmm4, %xmm1 @@ -953,10 +953,10 @@ define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) { ; SSE-LABEL: not_a_hsub_3: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: subsd %xmm2, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: subsd %xmm0, %xmm2 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movapd %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/haddsub-3.ll b/llvm/test/CodeGen/X86/haddsub-3.ll index 1a2863f0bdfc..a9206da5fe15 100644 --- a/llvm/test/CodeGen/X86/haddsub-3.ll +++ b/llvm/test/CodeGen/X86/haddsub-3.ll @@ -7,10 +7,10 @@ define float @pr26491(<4 x float> %a0) { ; SSE2-LABEL: pr26491: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] +; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -19,7 +19,7 @@ define float @pr26491(<4 x float> %a0) { ; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSSE3-NEXT: addps %xmm0, %xmm1 ; SSSE3-NEXT: movaps %xmm1, %xmm0 -; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; SSSE3-NEXT: addss %xmm1, %xmm0 ; SSSE3-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll index 004fec5cdeba..d34f8985cff3 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/CodeGen/X86/haddsub-undef.ll @@ -103,7 +103,7 @@ define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: test5_undef: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: addsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -168,7 +168,7 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] diff --git a/llvm/test/CodeGen/X86/half.ll b/llvm/test/CodeGen/X86/half.ll index 7770b9936eae..896b358c0d99 100644 --- a/llvm/test/CodeGen/X86/half.ll +++ b/llvm/test/CodeGen/X86/half.ll @@ -386,7 +386,7 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { ; CHECK-LIBCALL-NEXT: pushq %rbx ; CHECK-LIBCALL-NEXT: subq $48, %rsp ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx -; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi +; CHECK-LIBCALL-NEXT: movzwl (%rbx), %edi ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee ; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill ; CHECK-LIBCALL-NEXT: movzwl 2(%rbx), %edi @@ -472,7 +472,7 @@ define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { ; CHECK-LIBCALL-NEXT: pushq %rbx ; CHECK-LIBCALL-NEXT: subq $16, %rsp ; CHECK-LIBCALL-NEXT: movq %rdi, %rbx -; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %edi +; CHECK-LIBCALL-NEXT: movzwl 4(%rbx), %edi ; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee ; CHECK-LIBCALL-NEXT: movss %xmm0, {{[0-9]+}}(%rsp) # 4-byte Spill ; CHECK-LIBCALL-NEXT: movzwl 6(%rbx), %edi @@ -657,7 +657,7 @@ define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { ; CHECK-I686-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) # 16-byte Spill ; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; CHECK-I686-NEXT: movaps %xmm0, %xmm1 -; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; CHECK-I686-NEXT: movss %xmm1, (%esp) ; CHECK-I686-NEXT: calll __gnu_f2h_ieee ; CHECK-I686-NEXT: movw %ax, %si diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll index 4619e8ec1c73..9e62553dbea9 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smax.ll @@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X86-SSE42-LABEL: test_reduce_v2i64: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X86-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 ; X86-SSE42-NEXT: movd %xmm2, %eax @@ -80,7 +80,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X64-SSE42-LABEL: test_reduce_v2i64: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X64-SSE42-NEXT: pcmpgtq %xmm2, %xmm0 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 ; X64-SSE42-NEXT: movq %xmm2, %rax diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll index 9728379eb969..af4b96b1f809 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-smin.ll @@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X86-SSE42-LABEL: test_reduce_v2i64: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X86-SSE42-NEXT: movdqa %xmm2, %xmm0 ; X86-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 ; X86-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 @@ -81,7 +81,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X64-SSE42-LABEL: test_reduce_v2i64: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X64-SSE42-NEXT: movdqa %xmm2, %xmm0 ; X64-SSE42-NEXT: pcmpgtq %xmm1, %xmm0 ; X64-SSE42-NEXT: blendvpd %xmm0, %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll index fe8612bdc452..b466fc495a13 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umax.ll @@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X86-SSE42-LABEL: test_reduce_v2i64: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] ; X86-SSE42-NEXT: pxor %xmm3, %xmm0 ; X86-SSE42-NEXT: pxor %xmm2, %xmm3 @@ -86,7 +86,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X64-SSE42-LABEL: test_reduce_v2i64: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] ; X64-SSE42-NEXT: pxor %xmm3, %xmm0 ; X64-SSE42-NEXT: pxor %xmm2, %xmm3 @@ -1693,7 +1693,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) { ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: pmaxsw %xmm3, %xmm1 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm2 -; X86-SSE2-NEXT: pxor %xmm4, %xmm2 +; X86-SSE2-NEXT: pxor %xmm2, %xmm2 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 ; X86-SSE2-NEXT: pmaxsw %xmm1, %xmm2 @@ -1771,7 +1771,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) { ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: pmaxsw %xmm3, %xmm1 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm2 -; X64-SSE2-NEXT: pxor %xmm4, %xmm2 +; X64-SSE2-NEXT: pxor %xmm2, %xmm2 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 ; X64-SSE2-NEXT: pmaxsw %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll index 66dd8fd6a878..d1b4895836c6 100644 --- a/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll +++ b/llvm/test/CodeGen/X86/horizontal-reduce-umin.ll @@ -40,7 +40,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X86-SSE42-LABEL: test_reduce_v2i64: ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X86-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [0,2147483648,0,2147483648] ; X86-SSE42-NEXT: movdqa %xmm1, %xmm3 ; X86-SSE42-NEXT: pxor %xmm0, %xmm3 @@ -87,7 +87,7 @@ define i64 @test_reduce_v2i64(<2 x i64> %a0) { ; X64-SSE42-LABEL: test_reduce_v2i64: ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: movdqa %xmm0, %xmm1 -; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] +; X64-SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm1[2,3,0,1] ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm0 = [9223372036854775808,9223372036854775808] ; X64-SSE42-NEXT: movdqa %xmm1, %xmm3 ; X64-SSE42-NEXT: pxor %xmm0, %xmm3 @@ -444,7 +444,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X86-SSE42: ## %bb.0: ; X86-SSE42-NEXT: movdqa %xmm0, %xmm2 ; X86-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [0,2147483648,0,2147483648] -; X86-SSE42-NEXT: movdqa %xmm0, %xmm4 +; X86-SSE42-NEXT: movdqa %xmm2, %xmm4 ; X86-SSE42-NEXT: pxor %xmm3, %xmm4 ; X86-SSE42-NEXT: movdqa %xmm1, %xmm0 ; X86-SSE42-NEXT: pxor %xmm3, %xmm0 @@ -543,7 +543,7 @@ define i64 @test_reduce_v4i64(<4 x i64> %a0) { ; X64-SSE42: ## %bb.0: ; X64-SSE42-NEXT: movdqa %xmm0, %xmm2 ; X64-SSE42-NEXT: movdqa {{.*#+}} xmm3 = [9223372036854775808,9223372036854775808] -; X64-SSE42-NEXT: movdqa %xmm0, %xmm4 +; X64-SSE42-NEXT: movdqa %xmm2, %xmm4 ; X64-SSE42-NEXT: pxor %xmm3, %xmm4 ; X64-SSE42-NEXT: movdqa %xmm1, %xmm0 ; X64-SSE42-NEXT: pxor %xmm3, %xmm0 @@ -1597,7 +1597,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) { ; X86-SSE2-NEXT: pxor %xmm4, %xmm1 ; X86-SSE2-NEXT: pminsw %xmm3, %xmm1 ; X86-SSE2-NEXT: movdqa %xmm4, %xmm2 -; X86-SSE2-NEXT: pxor %xmm4, %xmm2 +; X86-SSE2-NEXT: pxor %xmm2, %xmm2 ; X86-SSE2-NEXT: pxor %xmm2, %xmm1 ; X86-SSE2-NEXT: pxor %xmm0, %xmm2 ; X86-SSE2-NEXT: pminsw %xmm1, %xmm2 @@ -1666,7 +1666,7 @@ define i16 @test_reduce_v32i16(<32 x i16> %a0) { ; X64-SSE2-NEXT: pxor %xmm4, %xmm1 ; X64-SSE2-NEXT: pminsw %xmm3, %xmm1 ; X64-SSE2-NEXT: movdqa %xmm4, %xmm2 -; X64-SSE2-NEXT: pxor %xmm4, %xmm2 +; X64-SSE2-NEXT: pxor %xmm2, %xmm2 ; X64-SSE2-NEXT: pxor %xmm2, %xmm1 ; X64-SSE2-NEXT: pxor %xmm0, %xmm2 ; X64-SSE2-NEXT: pminsw %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll index 1c36d31c480b..b6ac8a18b40b 100644 --- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll +++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll @@ -161,7 +161,6 @@ define void @testPR4459(x86_fp80 %a) { ; CHECK-NEXT: fstpt (%esp) ; CHECK-NEXT: calll _ceil ; CHECK-NEXT: fld %st(0) -; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: fistpl %st(0) ; CHECK-NEXT: ## InlineAsm End diff --git a/llvm/test/CodeGen/X86/ipra-local-linkage.ll b/llvm/test/CodeGen/X86/ipra-local-linkage.ll index 787b16f0d5b3..a394ed3e3858 100644 --- a/llvm/test/CodeGen/X86/ipra-local-linkage.ll +++ b/llvm/test/CodeGen/X86/ipra-local-linkage.ll @@ -24,7 +24,7 @@ define void @bar(i32 %X) { call void @foo() ; CHECK-LABEL: bar: ; CHECK: callq foo - ; CHECK-NEXT: movl %edi, %r15d + ; CHECK-NEXT: movl %eax, %r15d call void asm sideeffect "movl $0, %r12d", "{r15}~{r12}"(i32 %X) ret void } diff --git a/llvm/test/CodeGen/X86/localescape.ll b/llvm/test/CodeGen/X86/localescape.ll index a49af0898680..10ab8dd9672f 100644 --- a/llvm/test/CodeGen/X86/localescape.ll +++ b/llvm/test/CodeGen/X86/localescape.ll @@ -27,7 +27,7 @@ define void @print_framealloc_from_fp(i8* %fp) { ; X64-LABEL: print_framealloc_from_fp: ; X64: movq %rcx, %[[parent_fp:[a-z]+]] -; X64: movl .Lalloc_func$frame_escape_0(%rcx), %edx +; X64: movl .Lalloc_func$frame_escape_0(%[[parent_fp]]), %edx ; X64: leaq {{.*}}(%rip), %[[str:[a-z]+]] ; X64: movq %[[str]], %rcx ; X64: callq printf diff --git a/llvm/test/CodeGen/X86/machine-cp.ll b/llvm/test/CodeGen/X86/machine-cp.ll index 0f41d7984daf..94194716c4ef 100644 --- a/llvm/test/CodeGen/X86/machine-cp.ll +++ b/llvm/test/CodeGen/X86/machine-cp.ll @@ -8,7 +8,7 @@ define i32 @t1(i32 %a, i32 %b) nounwind { ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testl %esi, %esi +; CHECK-NEXT: testl %edx, %edx ; CHECK-NEXT: je LBB0_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB0_2: ## %while.body @@ -59,7 +59,7 @@ define i32 @t3(i64 %a, i64 %b) nounwind { ; CHECK: ## %bb.0: ## %entry ; CHECK-NEXT: movq %rsi, %rdx ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: testq %rsi, %rsi +; CHECK-NEXT: testq %rdx, %rdx ; CHECK-NEXT: je LBB2_1 ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: LBB2_2: ## %while.body diff --git a/llvm/test/CodeGen/X86/mul-i1024.ll b/llvm/test/CodeGen/X86/mul-i1024.ll index 98ee8f1b8ee1..9980042a4ccc 100644 --- a/llvm/test/CodeGen/X86/mul-i1024.ll +++ b/llvm/test/CodeGen/X86/mul-i1024.ll @@ -38,7 +38,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl %edx, %eax ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl %edi, -204(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -204(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -892(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 36(%eax), %eax @@ -47,7 +47,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, -236(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl %eax, -304(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -304(%ebp) # 4-byte Spill ; X32-NEXT: addl %ecx, %edi ; X32-NEXT: movl %edi, -80(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %eax @@ -58,7 +58,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %ecx -; X32-NEXT: movl %edx, -124(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -124(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -184(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %edx ; X32-NEXT: movl -400(%ebp), %esi # 4-byte Reload @@ -72,7 +72,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl %eax, -656(%ebp) # 4-byte Spill ; X32-NEXT: leal (%ebx,%edi), %eax ; X32-NEXT: movl %edx, %edi -; X32-NEXT: leal (%ecx,%edx), %edx +; X32-NEXT: leal (%ecx,%edi), %edx ; X32-NEXT: adcl %eax, %edx ; X32-NEXT: movl %edx, -700(%ebp) # 4-byte Spill ; X32-NEXT: seto %al @@ -123,7 +123,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: movl %ebx, -424(%ebp) # 4-byte Spill ; X32-NEXT: movl %edi, %ebx -; X32-NEXT: movl %edi, -256(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -256(%ebp) # 4-byte Spill ; X32-NEXT: movl -100(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl %eax, -80(%ebp) # 4-byte Folded Spill ; X32-NEXT: movl -204(%ebp), %eax # 4-byte Reload @@ -148,7 +148,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movzbl %bh, %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl %eax, -72(%ebp) # 4-byte Spill +; X32-NEXT: movl %edi, -72(%ebp) # 4-byte Spill ; X32-NEXT: movl 12(%ebp), %eax ; X32-NEXT: movl 8(%eax), %eax ; X32-NEXT: movl %eax, -108(%ebp) # 4-byte Spill @@ -220,7 +220,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, -364(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %edx, -396(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -396(%ebp) # 4-byte Spill ; X32-NEXT: movl -324(%ebp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %eax, %edi @@ -252,7 +252,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %edi ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %edx, -84(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -84(%ebp) # 4-byte Spill ; X32-NEXT: movl 20(%ecx), %eax ; X32-NEXT: movl %eax, -252(%ebp) # 4-byte Spill ; X32-NEXT: mull %ebx @@ -303,7 +303,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl -52(%ebp), %eax # 4-byte Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %edx, -56(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -56(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -780(%ebp) # 4-byte Spill ; X32-NEXT: movl -132(%ebp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %eax @@ -393,10 +393,10 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %eax, -160(%ebp) # 4-byte Spill +; X32-NEXT: movl %ecx, -160(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -268(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %esi, %eax ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl -264(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %ecx @@ -425,7 +425,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: adcl -60(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -592(%ebp) # 4-byte Spill ; X32-NEXT: movl %esi, %edx -; X32-NEXT: movl %esi, %eax +; X32-NEXT: movl %edx, %eax ; X32-NEXT: movl -116(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %eax ; X32-NEXT: movl %ebx, %eax @@ -533,7 +533,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %eax, -336(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -336(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl 52(%esi), %eax ; X32-NEXT: movl %eax, -144(%ebp) # 4-byte Spill @@ -559,7 +559,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl -336(%ebp), %ebx # 4-byte Reload ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movl %edi, %edx -; X32-NEXT: movl %edi, -176(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -176(%ebp) # 4-byte Spill ; X32-NEXT: adcl -360(%ebp), %edi # 4-byte Folded Reload ; X32-NEXT: addl %ecx, %ebx ; X32-NEXT: movl %ebx, -472(%ebp) # 4-byte Spill @@ -590,12 +590,12 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %edx, -384(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -384(%ebp) # 4-byte Spill ; X32-NEXT: movl -116(%ebp), %edi # 4-byte Reload ; X32-NEXT: movl %edi, %ecx ; X32-NEXT: movl %eax, %edx -; X32-NEXT: movl %eax, -480(%ebp) # 4-byte Spill -; X32-NEXT: addl %eax, %ecx +; X32-NEXT: movl %edx, -480(%ebp) # 4-byte Spill +; X32-NEXT: addl %edx, %ecx ; X32-NEXT: movl -84(%ebp), %ebx # 4-byte Reload ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl %esi, %eax @@ -642,8 +642,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl %eax, %ecx ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %edx, -496(%ebp) # 4-byte Spill -; X32-NEXT: movl %edx, %ecx +; X32-NEXT: movl %esi, -496(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, %ecx ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: movl %ecx, -992(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, %ecx @@ -761,7 +761,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %eax, -484(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -484(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -488(%ebp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: addl %esi, %eax @@ -793,7 +793,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: adcl -60(%ebp), %ebx # 4-byte Folded Reload ; X32-NEXT: movl %ebx, -928(%ebp) # 4-byte Spill ; X32-NEXT: movl 8(%ebp), %ecx -; X32-NEXT: movl 84(%ecx), %eax +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl 84(%eax), %eax ; X32-NEXT: movl %eax, -544(%ebp) # 4-byte Spill ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx @@ -870,7 +871,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %eax, -556(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -556(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, -560(%ebp) # 4-byte Spill ; X32-NEXT: movl -524(%ebp), %eax # 4-byte Reload ; X32-NEXT: movl %eax, %ebx @@ -881,7 +882,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl %ebx, -732(%ebp) # 4-byte Spill ; X32-NEXT: adcl %edi, %esi ; X32-NEXT: movl %esi, %edx -; X32-NEXT: movl %esi, -728(%ebp) # 4-byte Spill +; X32-NEXT: movl %edx, -728(%ebp) # 4-byte Spill ; X32-NEXT: addl -136(%ebp), %eax # 4-byte Folded Reload ; X32-NEXT: movl %eax, -712(%ebp) # 4-byte Spill ; X32-NEXT: movl -668(%ebp), %ecx # 4-byte Reload @@ -916,7 +917,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, -564(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %edx, -568(%ebp) # 4-byte Spill +; X32-NEXT: movl %ebx, -568(%ebp) # 4-byte Spill ; X32-NEXT: movl -500(%ebp), %edx # 4-byte Reload ; X32-NEXT: movl %edx, %edi ; X32-NEXT: addl %eax, %edi @@ -982,7 +983,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movzbl -88(%ebp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: addl %edx, %ebx ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %eax, -88(%ebp) # 4-byte Spill ; X32-NEXT: movl -28(%ebp), %edi # 4-byte Reload @@ -1037,7 +1038,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %ebx, %ecx ; X32-NEXT: movl -396(%ebp), %esi # 4-byte Reload ; X32-NEXT: addl %esi, %ecx ; X32-NEXT: adcl $0, %edx @@ -1051,7 +1052,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movzbl -16(%ebp), %ebx # 1-byte Folded Reload ; X32-NEXT: adcl %edi, %ebx ; X32-NEXT: movl %eax, %esi -; X32-NEXT: addl %eax, %edx +; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl %ecx, %ebx ; X32-NEXT: movl -64(%ebp), %eax # 4-byte Reload ; X32-NEXT: addl -324(%ebp), %eax # 4-byte Folded Reload @@ -1142,7 +1143,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movzbl %cl, %eax ; X32-NEXT: adcl %esi, %eax ; X32-NEXT: movl %edi, %esi -; X32-NEXT: addl %edi, %edx +; X32-NEXT: addl %esi, %edx ; X32-NEXT: adcl %ebx, %eax ; X32-NEXT: movl %eax, -112(%ebp) # 4-byte Spill ; X32-NEXT: movl -136(%ebp), %edi # 4-byte Reload @@ -1222,7 +1223,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movzbl %bl, %eax ; X32-NEXT: adcl %edx, %eax ; X32-NEXT: movl %ecx, %edx -; X32-NEXT: addl %ecx, %esi +; X32-NEXT: addl %edx, %esi ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl -100(%ebp), %edi # 4-byte Reload @@ -1696,7 +1697,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %ebx, -48(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -48(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -64(%ebp) # 4-byte Spill @@ -4478,7 +4479,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl %esi, %eax ; X32-NEXT: mull %ebx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %ebx, -140(%ebp) # 4-byte Spill +; X32-NEXT: movl %esi, -140(%ebp) # 4-byte Spill ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -56(%ebp) # 4-byte Spill @@ -5198,7 +5199,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: addl %edi, %edx ; X32-NEXT: movl 124(%ebx), %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: imull %ecx, %ebx +; X32-NEXT: imull %eax, %ebx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: movl -144(%ebp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, -96(%ebp) # 4-byte Folded Spill @@ -6072,8 +6073,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl 108(%eax), %edx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %edx, %ebx -; X32-NEXT: movl %edx, -112(%ebp) # 4-byte Spill -; X32-NEXT: mull %edx +; X32-NEXT: movl %ebx, -112(%ebp) # 4-byte Spill +; X32-NEXT: mull %ebx ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ecx, %eax ; X32-NEXT: movl %eax, -128(%ebp) # 4-byte Spill @@ -6112,7 +6113,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X32-NEXT: movl -184(%ebp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: mull %ebx +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, -144(%ebp) # 4-byte Spill ; X32-NEXT: movl %eax, -280(%ebp) # 4-byte Spill ; X32-NEXT: movl -60(%ebp), %ebx # 4-byte Reload @@ -6753,6 +6754,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %rbp ; X64-NEXT: addq %rcx, %rbx ; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: adcq %rdi, %rbp ; X64-NEXT: setb %bl ; X64-NEXT: movzbl %bl, %ebx @@ -6762,12 +6764,12 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %rcx, %r8 +; X64-NEXT: movq %r11, %r12 +; X64-NEXT: movq %r11, %r8 ; X64-NEXT: addq %rax, %r12 ; X64-NEXT: movq %rdi, %rax ; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: movq %rdi, (%rsp) # 8-byte Spill +; X64-NEXT: movq %r9, (%rsp) # 8-byte Spill ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: addq %rbp, %r12 ; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -6796,7 +6798,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rdx, %rbx ; X64-NEXT: movq 16(%rsi), %rax ; X64-NEXT: movq %rsi, %r13 -; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r13, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill @@ -6809,7 +6811,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rbx, %r11 ; X64-NEXT: movq %r8, %rax ; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %r9, %rax ; X64-NEXT: adcq %rcx, %rax @@ -6822,7 +6824,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: movq %rdx, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: adcq %rcx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq 32(%r13), %rax @@ -6838,9 +6840,9 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: addq %rdi, %rax +; X64-NEXT: addq %r9, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r9, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: adcq %r15, %rax ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill @@ -6858,7 +6860,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: addq %rsi, %r11 ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: adcq $0, %rbp -; X64-NEXT: addq %rbx, %r11 +; X64-NEXT: addq %rcx, %r11 ; X64-NEXT: adcq %rsi, %rbp ; X64-NEXT: movq %rsi, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: setb %bl @@ -6879,11 +6881,11 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rbx, %r10 ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: movq %rcx, %r12 -; X64-NEXT: movq %rcx, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r12, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq %r9, %rdx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %r11, %r8 -; X64-NEXT: adcq %r11, %r15 +; X64-NEXT: adcq %r8, %r15 ; X64-NEXT: movq %r15, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq %rax, %r14 ; X64-NEXT: movq %r14, {{[0-9]+}}(%rsp) # 8-byte Spill @@ -6979,12 +6981,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rdx, %r12 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r10, %rbp +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdi # 8-byte Reload ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rsi, %rbx @@ -7011,7 +7014,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: adcq $0, %r12 ; X64-NEXT: movq %r10, %rbx -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r11 # 8-byte Reload ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rcx @@ -7028,7 +7031,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %rbx -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbp, %r8 @@ -7059,7 +7062,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r11, %rsi -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r13 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload @@ -7139,12 +7142,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %rdx, %r10 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %r11, %rbp +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r11 +; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rdx, %rbp ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rdi, %rbx @@ -7274,7 +7278,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %r14 ; X64-NEXT: movq %r8, %rbp -; X64-NEXT: movq %r8, %rax +; X64-NEXT: movq %rbp, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbx @@ -7334,7 +7338,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %r9 ; X64-NEXT: adcq $0, %r10 ; X64-NEXT: movq %rbp, %rsi -; X64-NEXT: movq %rbp, %rax +; X64-NEXT: movq %rsi, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r14 @@ -7391,8 +7395,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq $0, %r15 ; X64-NEXT: movq %rbp, %rax ; X64-NEXT: movq %r8, %rdi -; X64-NEXT: movq %r8, {{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r8 +; X64-NEXT: movq %rdi, {{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: addq %rbx, %r8 @@ -7475,12 +7479,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rcx, %r14 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r10, %rdi +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %r11, %rbx @@ -7508,7 +7513,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %r8, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: adcq $0, %r14 ; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq %r13, %rbx +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r8 @@ -7521,7 +7527,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: addq %r8, %rcx ; X64-NEXT: adcq $0, %rsi -; X64-NEXT: movq %r13, %rax +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r13 # 8-byte Reload ; X64-NEXT: mulq %r13 ; X64-NEXT: movq %rdx, %rbx @@ -7555,12 +7561,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: setb -{{[0-9]+}}(%rsp) # 1-byte Folded Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbx # 8-byte Reload ; X64-NEXT: movq %rbx, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: movq %r10, %rsi +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 # 8-byte Reload ; X64-NEXT: movq %r8, %rax -; X64-NEXT: mulq %r10 +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rcx, %rdi @@ -7636,7 +7643,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r10 -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %rdi ; X64-NEXT: addq %rsi, %rdi @@ -7648,16 +7655,16 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r12 # 8-byte Reload -; X64-NEXT: addq %rax, %r12 +; X64-NEXT: addq %rbx, %r12 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r15 # 8-byte Reload -; X64-NEXT: adcq %rdx, %r15 +; X64-NEXT: adcq %r14, %r15 ; X64-NEXT: addq %rdi, %r12 ; X64-NEXT: adcq %rcx, %r15 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx # 8-byte Reload ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r11, %rsi -; X64-NEXT: movq %r11, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: mulq %r11 +; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, {{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq {{[0-9]+}}(%rsp), %r9 # 8-byte Reload @@ -7721,7 +7728,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rbp # 8-byte Reload -; X64-NEXT: addq %rax, %rbp +; X64-NEXT: addq %r9, %rbp ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax # 8-byte Reload ; X64-NEXT: adcq %rdx, %rax ; X64-NEXT: addq %rsi, %rbp @@ -7899,7 +7906,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq 88(%rsi), %rax ; X64-NEXT: movq %rsi, %r9 ; X64-NEXT: movq %rax, %rsi -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rcx, %r11 ; X64-NEXT: movq %rdx, %rbp @@ -7935,12 +7942,13 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: adcq %r8, %r10 ; X64-NEXT: addq %rbx, %rsi ; X64-NEXT: adcq %rbp, %r10 -; X64-NEXT: movq 64(%r9), %r13 +; X64-NEXT: movq %r9, %rdi +; X64-NEXT: movq 64(%rdi), %r13 ; X64-NEXT: movq %r13, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rcx -; X64-NEXT: movq 72(%r9), %r9 +; X64-NEXT: movq 72(%rdi), %r9 ; X64-NEXT: movq %r9, %rax ; X64-NEXT: mulq %r11 ; X64-NEXT: movq %rdx, %rbp @@ -7968,8 +7976,8 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: movq %rdx, %r11 ; X64-NEXT: movq %rax, %r15 ; X64-NEXT: movq %r12, %rcx -; X64-NEXT: addq %rax, %rcx -; X64-NEXT: adcq %rdx, %r8 +; X64-NEXT: addq %r15, %rcx +; X64-NEXT: adcq %r11, %r8 ; X64-NEXT: addq %rbp, %rcx ; X64-NEXT: adcq %rbx, %r8 ; X64-NEXT: addq -{{[0-9]+}}(%rsp), %rcx # 8-byte Folded Reload @@ -8021,13 +8029,14 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: setb %r10b ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rsi # 8-byte Reload ; X64-NEXT: movq %rsi, %rax -; X64-NEXT: mulq %r8 +; X64-NEXT: movq %r8, %rdi +; X64-NEXT: mulq %rdi ; X64-NEXT: movq %rdx, %rcx ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rbp # 8-byte Reload ; X64-NEXT: movq %rbp, %rax -; X64-NEXT: mulq %r8 -; X64-NEXT: movq %r8, %r12 +; X64-NEXT: mulq %rdi +; X64-NEXT: movq %rdi, %r12 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %rbx ; X64-NEXT: addq %rcx, %rbx @@ -8066,7 +8075,7 @@ define void @test_1024(i1024* %a, i1024* %b, i1024* %out) nounwind { ; X64-NEXT: imulq %rcx, %rdi ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %r12, %rsi -; X64-NEXT: mulq %r12 +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rax, %r9 ; X64-NEXT: addq %rdi, %rdx ; X64-NEXT: movq 104(%rbp), %r8 diff --git a/llvm/test/CodeGen/X86/mul-i256.ll b/llvm/test/CodeGen/X86/mul-i256.ll index 105af640aaeb..c79685aecd08 100644 --- a/llvm/test/CodeGen/X86/mul-i256.ll +++ b/llvm/test/CodeGen/X86/mul-i256.ll @@ -44,7 +44,7 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 { ; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %ecx ; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %ecx ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -62,9 +62,9 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %edx ; X32-NEXT: movl %edx, %ebp -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %eax, %esi -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload ; X32-NEXT: xorl %edx, %edx ; X32-NEXT: mull %edx @@ -127,7 +127,7 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 { ; X32-NEXT: adcl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl 8(%eax), %ebx +; X32-NEXT: movl 8(%ecx), %ebx ; X32-NEXT: movl %esi, %eax ; X32-NEXT: movl %esi, %edi ; X32-NEXT: mull %ebx @@ -156,7 +156,7 @@ define void @test(i256* %a, i256* %b, i256* %out) #0 { ; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload ; X32-NEXT: adcl %eax, %esi ; X32-NEXT: movl %ebx, %edi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: mull %ecx ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill diff --git a/llvm/test/CodeGen/X86/mul-i512.ll b/llvm/test/CodeGen/X86/mul-i512.ll index d83454856fee..d846729096e1 100644 --- a/llvm/test/CodeGen/X86/mul-i512.ll +++ b/llvm/test/CodeGen/X86/mul-i512.ll @@ -31,7 +31,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: movl %edi, (%esp) # 4-byte Spill ; X32-NEXT: adcl %ecx, %ebx ; X32-NEXT: movl %ecx, %edi -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: setb %cl ; X32-NEXT: addl %eax, %ebx ; X32-NEXT: movzbl %cl, %ecx @@ -55,7 +55,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: mull %ebx ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %edi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl 4(%ecx), %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, %esi @@ -92,13 +92,14 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: adcl %edi, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl (%ecx), %eax +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl (%eax), %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: xorl %ebp, %ebp ; X32-NEXT: mull %ebp ; X32-NEXT: movl %edx, %ebx ; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %eax, %edx +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ebx, %eax @@ -112,7 +113,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %ecx, %edi ; X32-NEXT: movl %ecx, %ebp -; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: addl %eax, %edi ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: adcl %edx, %eax @@ -142,7 +143,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: setb %bl ; X32-NEXT: addl %eax, %ecx ; X32-NEXT: movzbl %bl, %ebx @@ -277,7 +278,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: adcl %ebx, %ecx ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl %edi, %ebp -; X32-NEXT: movl %edi, %eax +; X32-NEXT: movl %ebp, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, %ebx @@ -432,7 +433,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: adcl %edi, %ecx ; X32-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill ; X32-NEXT: movl %ebx, %edi -; X32-NEXT: movl %ebx, %eax +; X32-NEXT: movl %edi, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %eax, %ebp ; X32-NEXT: addl %ecx, %ebp @@ -898,7 +899,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: mull %esi ; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl %edx, %esi ; X32-NEXT: addl %ebx, %eax ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill @@ -928,7 +929,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: movl %ecx, %eax ; X32-NEXT: movl %ebx, %esi -; X32-NEXT: mull %ebx +; X32-NEXT: mull %esi ; X32-NEXT: movl %edx, %edi ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx # 4-byte Reload @@ -1076,7 +1077,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: addl %esi, %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload ; X32-NEXT: movl %edi, %eax -; X32-NEXT: imull %edi, %esi +; X32-NEXT: imull %eax, %esi ; X32-NEXT: addl %edx, %esi ; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx # 4-byte Folded Reload ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill @@ -1176,7 +1177,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi ; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl 40(%esi), %ebx +; X32-NEXT: movl 40(%ecx), %ebx ; X32-NEXT: movl %ebx, %eax ; X32-NEXT: movl %ebx, {{[0-9]+}}(%esp) # 4-byte Spill ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload @@ -1373,7 +1374,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X32-NEXT: addl %edi, %edx ; X32-NEXT: movl 60(%ebx), %ebx ; X32-NEXT: movl %ecx, %eax -; X32-NEXT: imull %ecx, %ebx +; X32-NEXT: imull %eax, %ebx ; X32-NEXT: addl %edx, %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload ; X32-NEXT: addl %ecx, {{[0-9]+}}(%esp) # 4-byte Folded Spill @@ -1545,7 +1546,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X64-NEXT: movq 8(%rsi), %rbp ; X64-NEXT: movq %r15, %rax ; X64-NEXT: movq %rdx, %rsi -; X64-NEXT: mulq %rdx +; X64-NEXT: mulq %rsi ; X64-NEXT: movq %rdx, %r9 ; X64-NEXT: movq %rax, %r8 ; X64-NEXT: movq %r11, %rax @@ -1568,15 +1569,15 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X64-NEXT: movq %r11, %rax ; X64-NEXT: mulq %rbp ; X64-NEXT: movq %rbp, %r14 -; X64-NEXT: movq %rbp, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r14, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rdx, %rsi ; X64-NEXT: movq %rax, %rbp ; X64-NEXT: addq %rcx, %rbp ; X64-NEXT: adcq %rbx, %rsi ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: movq %r10, %rbx -; X64-NEXT: movq %r10, -{{[0-9]+}}(%rsp) # 8-byte Spill -; X64-NEXT: movq %r10, %rax +; X64-NEXT: movq %rbx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %rbx, %rax ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, %r13 ; X64-NEXT: movq %rax, %r10 @@ -1584,7 +1585,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, %r15 -; X64-NEXT: movq %rax, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r15, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: addq %r10, %r15 ; X64-NEXT: adcq %r13, %rdx ; X64-NEXT: addq %rbp, %r15 @@ -1623,8 +1624,8 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X64-NEXT: mulq %rdx ; X64-NEXT: movq %rdx, %r14 ; X64-NEXT: movq %rax, %r11 -; X64-NEXT: addq %rax, %r10 -; X64-NEXT: adcq %rdx, %r13 +; X64-NEXT: addq %r11, %r10 +; X64-NEXT: adcq %r14, %r13 ; X64-NEXT: addq %rbp, %r10 ; X64-NEXT: adcq %rsi, %r13 ; X64-NEXT: addq %r8, %r10 @@ -1636,7 +1637,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X64-NEXT: movq 16(%rsi), %r8 ; X64-NEXT: movq %rcx, %rax ; X64-NEXT: movq %rcx, %r9 -; X64-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) # 8-byte Spill +; X64-NEXT: movq %r9, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: mulq %r8 ; X64-NEXT: movq %rdx, %rdi ; X64-NEXT: movq %rax, %r12 @@ -1667,7 +1668,7 @@ define void @test_512(i512* %a, i512* %b, i512* %out) nounwind { ; X64-NEXT: mulq %rcx ; X64-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) # 8-byte Spill ; X64-NEXT: movq %rax, %rbp -; X64-NEXT: addq %rax, %r11 +; X64-NEXT: addq %rbp, %r11 ; X64-NEXT: adcq %rdx, %r14 ; X64-NEXT: addq %r9, %r11 ; X64-NEXT: adcq %rbx, %r14 diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll index 8ef1ba21546a..e851c3a3d5b3 100644 --- a/llvm/test/CodeGen/X86/mul128.ll +++ b/llvm/test/CodeGen/X86/mul128.ll @@ -8,7 +8,7 @@ define i128 @foo(i128 %t, i128 %u) { ; X64-NEXT: movq %rdx, %r8 ; X64-NEXT: imulq %rdi, %rcx ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: mulq %rdx +; X64-NEXT: mulq %r8 ; X64-NEXT: addq %rcx, %rdx ; X64-NEXT: imulq %r8, %rsi ; X64-NEXT: addq %rsi, %rdx diff --git a/llvm/test/CodeGen/X86/mulvi32.ll b/llvm/test/CodeGen/X86/mulvi32.ll index 3d1f577cd494..2997f3f8712b 100644 --- a/llvm/test/CodeGen/X86/mulvi32.ll +++ b/llvm/test/CodeGen/X86/mulvi32.ll @@ -234,7 +234,7 @@ define <4 x i64> @_mul4xi32toi64b(<4 x i32>, <4 x i32>) { ; SSE-LABEL: _mul4xi32toi64b: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE-NEXT: pmuludq %xmm1, %xmm2 ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] ; SSE-NEXT: pmuludq %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll index 94a27c115eaf..b7a728c18f6c 100644 --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -209,9 +209,9 @@ define void @f_thunk(i8* %this, ...) { ; WINDOWS-NEXT: movq %r8, %rdi ; WINDOWS-NEXT: movq %rdx, %rbx ; WINDOWS-NEXT: movq %rcx, %rbp -; WINDOWS-NEXT: movq %r9, {{[0-9]+}}(%rsp) -; WINDOWS-NEXT: movq %r8, {{[0-9]+}}(%rsp) -; WINDOWS-NEXT: movq %rdx, {{[0-9]+}}(%rsp) +; WINDOWS-NEXT: movq %rsi, {{[0-9]+}}(%rsp) +; WINDOWS-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; WINDOWS-NEXT: movq %rbx, {{[0-9]+}}(%rsp) ; WINDOWS-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; WINDOWS-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; WINDOWS-NEXT: callq get_f diff --git a/llvm/test/CodeGen/X86/pmul.ll b/llvm/test/CodeGen/X86/pmul.ll index f6f466a0cbfb..9d82edf66c45 100644 --- a/llvm/test/CodeGen/X86/pmul.ll +++ b/llvm/test/CodeGen/X86/pmul.ll @@ -9,7 +9,7 @@ define <16 x i8> @mul_v16i8c(<16 x i8> %i) nounwind { ; SSE2-LABEL: mul_v16i8c: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm1 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [117,117,117,117,117,117,117,117] ; SSE2-NEXT: pmullw %xmm2, %xmm1 @@ -143,10 +143,10 @@ define <16 x i8> @mul_v16i8(<16 x i8> %i, <16 x i8> %j) nounwind { ; SSE2-LABEL: mul_v16i8: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm0[8],xmm3[9],xmm0[9],xmm3[10],xmm0[10],xmm3[11],xmm0[11],xmm3[12],xmm0[12],xmm3[13],xmm0[13],xmm3[14],xmm0[14],xmm3[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm3 ; SSE2-NEXT: pmullw %xmm2, %xmm3 ; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] @@ -386,7 +386,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind { ; SSE2-LABEL: mul_v32i8c: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [117,117,117,117,117,117,117,117] ; SSE2-NEXT: pmullw %xmm3, %xmm2 @@ -398,7 +398,7 @@ define <32 x i8> @mul_v32i8c(<32 x i8> %i) nounwind { ; SSE2-NEXT: pand %xmm4, %xmm0 ; SSE2-NEXT: packuswb %xmm2, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: pand %xmm4, %xmm2 @@ -567,10 +567,10 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind { ; SSE2-LABEL: mul_v32i8: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm2, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm2[8],xmm4[9],xmm2[9],xmm4[10],xmm2[10],xmm4[11],xmm2[11],xmm4[12],xmm2[12],xmm4[13],xmm2[13],xmm4[14],xmm2[14],xmm4[15],xmm2[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm4 ; SSE2-NEXT: movdqa %xmm0, %xmm5 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm5 ; SSE2-NEXT: pmullw %xmm4, %xmm5 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] @@ -583,10 +583,10 @@ define <32 x i8> @mul_v32i8(<32 x i8> %i, <32 x i8> %j) nounwind { ; SSE2-NEXT: pand %xmm4, %xmm0 ; SSE2-NEXT: packuswb %xmm5, %xmm0 ; SSE2-NEXT: movdqa %xmm3, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm3[8],xmm2[9],xmm3[9],xmm2[10],xmm3[10],xmm2[11],xmm3[11],xmm2[12],xmm3[12],xmm2[13],xmm3[13],xmm2[14],xmm3[14],xmm2[15],xmm3[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm1, %xmm5 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm1[8],xmm5[9],xmm1[9],xmm5[10],xmm1[10],xmm5[11],xmm1[11],xmm5[12],xmm1[12],xmm5[13],xmm1[13],xmm5[14],xmm1[14],xmm5[15],xmm1[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm5 ; SSE2-NEXT: pmullw %xmm2, %xmm5 ; SSE2-NEXT: pand %xmm4, %xmm5 @@ -774,7 +774,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind { ; SSE2-LABEL: mul_v64i8c: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm6 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm0[8],xmm6[9],xmm0[9],xmm6[10],xmm0[10],xmm6[11],xmm0[11],xmm6[12],xmm0[12],xmm6[13],xmm0[13],xmm6[14],xmm0[14],xmm6[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm6 ; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [117,117,117,117,117,117,117,117] ; SSE2-NEXT: pmullw %xmm4, %xmm6 @@ -786,7 +786,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind { ; SSE2-NEXT: pand %xmm5, %xmm0 ; SSE2-NEXT: packuswb %xmm6, %xmm0 ; SSE2-NEXT: movdqa %xmm1, %xmm6 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm1[8],xmm6[9],xmm1[9],xmm6[10],xmm1[10],xmm6[11],xmm1[11],xmm6[12],xmm1[12],xmm6[13],xmm1[13],xmm6[14],xmm1[14],xmm6[15],xmm1[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm6 ; SSE2-NEXT: pmullw %xmm4, %xmm6 ; SSE2-NEXT: pand %xmm5, %xmm6 @@ -796,7 +796,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind { ; SSE2-NEXT: pand %xmm5, %xmm1 ; SSE2-NEXT: packuswb %xmm6, %xmm1 ; SSE2-NEXT: movdqa %xmm2, %xmm6 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm2[8],xmm6[9],xmm2[9],xmm6[10],xmm2[10],xmm6[11],xmm2[11],xmm6[12],xmm2[12],xmm6[13],xmm2[13],xmm6[14],xmm2[14],xmm6[15],xmm2[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm6 ; SSE2-NEXT: pmullw %xmm4, %xmm6 ; SSE2-NEXT: pand %xmm5, %xmm6 @@ -806,7 +806,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind { ; SSE2-NEXT: pand %xmm5, %xmm2 ; SSE2-NEXT: packuswb %xmm6, %xmm2 ; SSE2-NEXT: movdqa %xmm3, %xmm6 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8],xmm3[8],xmm6[9],xmm3[9],xmm6[10],xmm3[10],xmm6[11],xmm3[11],xmm6[12],xmm3[12],xmm6[13],xmm3[13],xmm6[14],xmm3[14],xmm6[15],xmm3[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm6 = xmm6[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm6 ; SSE2-NEXT: pmullw %xmm4, %xmm6 ; SSE2-NEXT: pand %xmm5, %xmm6 @@ -821,7 +821,7 @@ define <64 x i8> @mul_v64i8c(<64 x i8> %i) nounwind { ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 +; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm6 = [117,117,117,117,117,117,117,117] ; SSE41-NEXT: pmullw %xmm6, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm7 = [255,255,255,255,255,255,255,255] @@ -939,10 +939,10 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; SSE2-LABEL: mul_v64i8: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm4, %xmm8 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm8 = xmm8[8],xmm4[8],xmm8[9],xmm4[9],xmm8[10],xmm4[10],xmm8[11],xmm4[11],xmm8[12],xmm4[12],xmm8[13],xmm4[13],xmm8[14],xmm4[14],xmm8[15],xmm4[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm8 = xmm8[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm8 ; SSE2-NEXT: movdqa %xmm0, %xmm9 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm0[8],xmm9[9],xmm0[9],xmm9[10],xmm0[10],xmm9[11],xmm0[11],xmm9[12],xmm0[12],xmm9[13],xmm0[13],xmm9[14],xmm0[14],xmm9[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm9 = xmm9[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm9 ; SSE2-NEXT: pmullw %xmm8, %xmm9 ; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [255,255,255,255,255,255,255,255] @@ -955,10 +955,10 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; SSE2-NEXT: pand %xmm8, %xmm0 ; SSE2-NEXT: packuswb %xmm9, %xmm0 ; SSE2-NEXT: movdqa %xmm5, %xmm9 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm9 = xmm9[8],xmm5[8],xmm9[9],xmm5[9],xmm9[10],xmm5[10],xmm9[11],xmm5[11],xmm9[12],xmm5[12],xmm9[13],xmm5[13],xmm9[14],xmm5[14],xmm9[15],xmm5[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm9 = xmm9[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm9 ; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm1[8],xmm4[9],xmm1[9],xmm4[10],xmm1[10],xmm4[11],xmm1[11],xmm4[12],xmm1[12],xmm4[13],xmm1[13],xmm4[14],xmm1[14],xmm4[15],xmm1[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm4 ; SSE2-NEXT: pmullw %xmm9, %xmm4 ; SSE2-NEXT: pand %xmm8, %xmm4 @@ -970,10 +970,10 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; SSE2-NEXT: pand %xmm8, %xmm1 ; SSE2-NEXT: packuswb %xmm4, %xmm1 ; SSE2-NEXT: movdqa %xmm6, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm6[8],xmm4[9],xmm6[9],xmm4[10],xmm6[10],xmm4[11],xmm6[11],xmm4[12],xmm6[12],xmm4[13],xmm6[13],xmm4[14],xmm6[14],xmm4[15],xmm6[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm4 ; SSE2-NEXT: movdqa %xmm2, %xmm5 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm2[8],xmm5[9],xmm2[9],xmm5[10],xmm2[10],xmm5[11],xmm2[11],xmm5[12],xmm2[12],xmm5[13],xmm2[13],xmm5[14],xmm2[14],xmm5[15],xmm2[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm5 ; SSE2-NEXT: pmullw %xmm4, %xmm5 ; SSE2-NEXT: pand %xmm8, %xmm5 @@ -985,10 +985,10 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; SSE2-NEXT: pand %xmm8, %xmm2 ; SSE2-NEXT: packuswb %xmm5, %xmm2 ; SSE2-NEXT: movdqa %xmm7, %xmm4 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8],xmm7[8],xmm4[9],xmm7[9],xmm4[10],xmm7[10],xmm4[11],xmm7[11],xmm4[12],xmm7[12],xmm4[13],xmm7[13],xmm4[14],xmm7[14],xmm4[15],xmm7[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm4 = xmm4[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm4 ; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm3[8],xmm5[9],xmm3[9],xmm5[10],xmm3[10],xmm5[11],xmm3[11],xmm5[12],xmm3[12],xmm5[13],xmm3[13],xmm5[14],xmm3[14],xmm5[15],xmm3[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm5 ; SSE2-NEXT: pmullw %xmm4, %xmm5 ; SSE2-NEXT: pand %xmm8, %xmm5 @@ -1006,7 +1006,7 @@ define <64 x i8> @mul_v64i8(<64 x i8> %i, <64 x i8> %j) nounwind { ; SSE41-NEXT: movdqa %xmm1, %xmm8 ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pmovsxbw %xmm4, %xmm9 -; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 +; SSE41-NEXT: pmovsxbw %xmm1, %xmm0 ; SSE41-NEXT: pmullw %xmm9, %xmm0 ; SSE41-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255,255,255,255,255] ; SSE41-NEXT: pand %xmm9, %xmm0 @@ -1383,7 +1383,7 @@ define <8 x i64> @mul_v8i64_sext(<8 x i16> %val1, <8 x i32> %val2) { ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm0[4],xmm9[5],xmm0[5],xmm9[6],xmm0[6],xmm9[7],xmm0[7] +; SSE2-NEXT: punpckhwd {{.*#+}} xmm9 = xmm9[4],xmm1[4],xmm9[5],xmm1[5],xmm9[6],xmm1[6],xmm9[7],xmm1[7] ; SSE2-NEXT: movdqa %xmm9, %xmm0 ; SSE2-NEXT: psrad $31, %xmm0 ; SSE2-NEXT: psrad $16, %xmm9 diff --git a/llvm/test/CodeGen/X86/powi.ll b/llvm/test/CodeGen/X86/powi.ll index 3d5d89083ba6..246e853eed66 100644 --- a/llvm/test/CodeGen/X86/powi.ll +++ b/llvm/test/CodeGen/X86/powi.ll @@ -5,7 +5,7 @@ define double @pow_wrapper(double %a) nounwind readonly ssp noredzone { ; CHECK-LABEL: pow_wrapper: ; CHECK: # %bb.0: ; CHECK-NEXT: movapd %xmm0, %xmm1 -; CHECK-NEXT: mulsd %xmm0, %xmm1 +; CHECK-NEXT: mulsd %xmm1, %xmm1 ; CHECK-NEXT: mulsd %xmm1, %xmm0 ; CHECK-NEXT: mulsd %xmm1, %xmm1 ; CHECK-NEXT: mulsd %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/pr11334.ll b/llvm/test/CodeGen/X86/pr11334.ll index d0b965a12895..d5c0f10324fb 100644 --- a/llvm/test/CodeGen/X86/pr11334.ll +++ b/llvm/test/CodeGen/X86/pr11334.ll @@ -25,7 +25,7 @@ define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind { ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 ; SSE-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; SSE-NEXT: fldl -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr29112.ll b/llvm/test/CodeGen/X86/pr29112.ll index 195ff521e942..f6bf76c1f853 100644 --- a/llvm/test/CodeGen/X86/pr29112.ll +++ b/llvm/test/CodeGen/X86/pr29112.ll @@ -49,13 +49,13 @@ define <4 x float> @bar(<4 x float>* %a1p, <4 x float>* %a2p, <4 x float> %a3, < ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm12[0] ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm2 ; CHECK-NEXT: vmovaps %xmm15, %xmm1 -; CHECK-NEXT: vmovaps %xmm15, {{[0-9]+}}(%rsp) # 16-byte Spill -; CHECK-NEXT: vaddps %xmm0, %xmm15, %xmm9 +; CHECK-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm9 ; CHECK-NEXT: vaddps %xmm14, %xmm10, %xmm0 -; CHECK-NEXT: vaddps %xmm15, %xmm15, %xmm8 +; CHECK-NEXT: vaddps %xmm1, %xmm1, %xmm8 ; CHECK-NEXT: vaddps %xmm11, %xmm3, %xmm3 ; CHECK-NEXT: vaddps %xmm0, %xmm3, %xmm0 -; CHECK-NEXT: vaddps %xmm0, %xmm15, %xmm0 +; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0 ; CHECK-NEXT: vmovaps %xmm8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps %xmm9, (%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm3 # 16-byte Reload diff --git a/llvm/test/CodeGen/X86/pr34080-2.ll b/llvm/test/CodeGen/X86/pr34080-2.ll index 5cda5dadcb2b..5c00f0e3706b 100644 --- a/llvm/test/CodeGen/X86/pr34080-2.ll +++ b/llvm/test/CodeGen/X86/pr34080-2.ll @@ -23,7 +23,7 @@ define void @computeJD(%struct.DateTime*) nounwind { ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: imull %ecx ; CHECK-NEXT: movl %edx, %ecx -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: sarl $5, %ecx ; CHECK-NEXT: addl %eax, %ecx @@ -31,7 +31,7 @@ define void @computeJD(%struct.DateTime*) nounwind { ; CHECK-NEXT: movl %esi, %eax ; CHECK-NEXT: imull %edx ; CHECK-NEXT: movl %edx, %edi -; CHECK-NEXT: movl %edx, %eax +; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: shrl $31, %eax ; CHECK-NEXT: sarl $7, %edi ; CHECK-NEXT: addl %eax, %edi diff --git a/llvm/test/CodeGen/X86/required-vector-width.ll b/llvm/test/CodeGen/X86/required-vector-width.ll index f8ac85753012..e6b2f2f9585a 100644 --- a/llvm/test/CodeGen/X86/required-vector-width.ll +++ b/llvm/test/CodeGen/X86/required-vector-width.ll @@ -39,12 +39,12 @@ define void @add512(<16 x i32>* %a, <16 x i32>* %b, <16 x i32>* %c) "required-ve define void @avg_v64i8_256(<64 x i8>* %a, <64 x i8>* %b) "required-vector-width"="256" { ; CHECK-LABEL: avg_v64i8_256: ; CHECK: # %bb.0: -; CHECK-NEXT: vmovdqa (%rsi), %ymm0 -; CHECK-NEXT: vmovdqa 32(%rsi), %ymm1 -; CHECK-NEXT: vpavgb (%rdi), %ymm0, %ymm0 -; CHECK-NEXT: vpavgb 32(%rdi), %ymm1, %ymm1 -; CHECK-NEXT: vmovdqu %ymm1, (%rax) +; CHECK-NEXT: vmovdqa 32(%rdi), %ymm0 +; CHECK-NEXT: vmovdqa (%rsi), %ymm1 +; CHECK-NEXT: vpavgb (%rdi), %ymm1, %ymm1 +; CHECK-NEXT: vpavgb 32(%rsi), %ymm0, %ymm0 ; CHECK-NEXT: vmovdqu %ymm0, (%rax) +; CHECK-NEXT: vmovdqu %ymm1, (%rax) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: retq %1 = load <64 x i8>, <64 x i8>* %a diff --git a/llvm/test/CodeGen/X86/retpoline-external.ll b/llvm/test/CodeGen/X86/retpoline-external.ll index 04d6ecf816c2..2f21bb2566de 100644 --- a/llvm/test/CodeGen/X86/retpoline-external.ll +++ b/llvm/test/CodeGen/X86/retpoline-external.ll @@ -19,7 +19,7 @@ entry: ; X64-LABEL: icall_reg: ; X64-DAG: movq %rdi, %[[fp:[^ ]*]] ; X64-DAG: movl %esi, %[[x:[^ ]*]] -; X64: movl %esi, %edi +; X64: movl %[[x]], %edi ; X64: callq bar ; X64-DAG: movl %[[x]], %edi ; X64-DAG: movq %[[fp]], %r11 @@ -111,7 +111,7 @@ define void @vcall(%struct.Foo* %obj) #0 { ; X64-LABEL: vcall: ; X64: movq %rdi, %[[obj:[^ ]*]] -; X64: movq (%rdi), %[[vptr:[^ ]*]] +; X64: movq (%[[obj]]), %[[vptr:[^ ]*]] ; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] ; X64: movq %[[fp]], %r11 ; X64: callq __x86_indirect_thunk_r11 diff --git a/llvm/test/CodeGen/X86/retpoline.ll b/llvm/test/CodeGen/X86/retpoline.ll index da12ae8cba62..477609e2d10b 100644 --- a/llvm/test/CodeGen/X86/retpoline.ll +++ b/llvm/test/CodeGen/X86/retpoline.ll @@ -19,7 +19,7 @@ entry: ; X64-LABEL: icall_reg: ; X64-DAG: movq %rdi, %[[fp:[^ ]*]] ; X64-DAG: movl %esi, %[[x:[^ ]*]] -; X64: movl %esi, %edi +; X64: movl %[[x]], %edi ; X64: callq bar ; X64-DAG: movl %[[x]], %edi ; X64-DAG: movq %[[fp]], %r11 @@ -111,7 +111,7 @@ define void @vcall(%struct.Foo* %obj) #0 { ; X64-LABEL: vcall: ; X64: movq %rdi, %[[obj:[^ ]*]] -; X64: movq (%rdi), %[[vptr:[^ ]*]] +; X64: movq (%[[obj]]), %[[vptr:[^ ]*]] ; X64: movq 8(%[[vptr]]), %[[fp:[^ ]*]] ; X64: movq %[[fp]], %r11 ; X64: callq __llvm_retpoline_r11 diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll index 5d5b59babac1..382eba3d6524 100644 --- a/llvm/test/CodeGen/X86/sad.ll +++ b/llvm/test/CodeGen/X86/sad.ll @@ -653,7 +653,7 @@ define i32 @sad_avx64i8() nounwind { ; SSE2-NEXT: paddd %xmm7, %xmm0 ; SSE2-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm13, %xmm1 -; SSE2-NEXT: movdqa %xmm13, %xmm0 +; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: psrad $31, %xmm0 ; SSE2-NEXT: paddd %xmm0, %xmm1 ; SSE2-NEXT: pxor %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/safestack.ll b/llvm/test/CodeGen/X86/safestack.ll index a032319016ed..a9d1b32cfcff 100644 --- a/llvm/test/CodeGen/X86/safestack.ll +++ b/llvm/test/CodeGen/X86/safestack.ll @@ -40,6 +40,6 @@ declare void @_Z7CapturePi(i32*) ; LINUX-I386-PA: calll __safestack_pointer_address ; LINUX-I386-PA: movl %eax, %[[A:.*]] -; LINUX-I386-PA: movl (%eax), %[[B:.*]] +; LINUX-I386-PA: movl (%[[A]]), %[[B:.*]] ; LINUX-I386-PA: leal -16(%[[B]]), %[[C:.*]] ; LINUX-I386-PA: movl %[[C]], (%[[A]]) diff --git a/llvm/test/CodeGen/X86/safestack_inline.ll b/llvm/test/CodeGen/X86/safestack_inline.ll index 0fc53556f62f..ed93c4985640 100644 --- a/llvm/test/CodeGen/X86/safestack_inline.ll +++ b/llvm/test/CodeGen/X86/safestack_inline.ll @@ -25,6 +25,6 @@ declare void @_Z7CapturePi(i32*) ; CALL: callq __safestack_pointer_address ; CALL: movq %rax, %[[A:.*]] -; CALL: movq (%rax), %[[B:.*]] +; CALL: movq (%[[A]]), %[[B:.*]] ; CALL: leaq -16(%[[B]]), %[[C:.*]] ; CALL: movq %[[C]], (%[[A]]) diff --git a/llvm/test/CodeGen/X86/scalar_widen_div.ll b/llvm/test/CodeGen/X86/scalar_widen_div.ll index 8940e9a15bd6..1f36db5b7424 100644 --- a/llvm/test/CodeGen/X86/scalar_widen_div.ll +++ b/llvm/test/CodeGen/X86/scalar_widen_div.ll @@ -11,7 +11,7 @@ define void @vectorDiv (<2 x i32> addrspace(1)* %nsource, <2 x i32> addrspace(1) ; CHECK-NEXT: movq %rdx, %r8 ; CHECK-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) -; CHECK-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movq %r8, -{{[0-9]+}}(%rsp) ; CHECK-NEXT: movslq -{{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: pmovsxdq (%rdi,%rcx,8), %xmm0 ; CHECK-NEXT: pmovsxdq (%rsi,%rcx,8), %xmm1 @@ -403,7 +403,7 @@ define void @test_int_div(<3 x i32>* %dest, <3 x i32>* %old, i32 %n) { ; CHECK-LABEL: test_int_div: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edx, %r9d -; CHECK-NEXT: testl %edx, %edx +; CHECK-NEXT: testl %r9d, %r9d ; CHECK-NEXT: jle .LBB12_3 ; CHECK-NEXT: # %bb.1: # %bb.nph ; CHECK-NEXT: xorl %ecx, %ecx diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll index 62cbf31fe62f..e1c0703ba17d 100644 --- a/llvm/test/CodeGen/X86/select.ll +++ b/llvm/test/CodeGen/X86/select.ll @@ -22,7 +22,8 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind { ; MCU-NEXT: jne .LBB0_1 ; MCU-NEXT: # %bb.2: ; MCU-NEXT: addl $8, %edx -; MCU-NEXT: movl (%edx), %eax +; MCU-NEXT: movl %edx, %eax +; MCU-NEXT: movl (%eax), %eax ; MCU-NEXT: retl ; MCU-NEXT: .LBB0_1: ; MCU-NEXT: addl $8, %eax diff --git a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll index 1364732813ad..099ef137d8d9 100644 --- a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll +++ b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll @@ -61,7 +61,7 @@ false: ; CHECK-LABEL: @use_eax_before_prologue@8: # @use_eax_before_prologue ; CHECK: movl %ecx, %eax -; CHECK: cmpl %edx, %ecx +; CHECK: cmpl %edx, %eax ; CHECK: jge LBB1_2 ; CHECK: pushl %eax ; CHECK: movl $4092, %eax diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll index 007531fca7df..f22d82817b40 100644 --- a/llvm/test/CodeGen/X86/slow-pmulld.ll +++ b/llvm/test/CodeGen/X86/slow-pmulld.ll @@ -614,7 +614,7 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) { ; SLOW32-NEXT: movdqa %xmm1, %xmm3 ; SLOW32-NEXT: movdqa %xmm0, %xmm1 ; SLOW32-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778,18778,18778,18778,18778] -; SLOW32-NEXT: movdqa %xmm0, %xmm4 +; SLOW32-NEXT: movdqa %xmm1, %xmm4 ; SLOW32-NEXT: pmulhuw %xmm2, %xmm4 ; SLOW32-NEXT: pmullw %xmm2, %xmm1 ; SLOW32-NEXT: movdqa %xmm1, %xmm0 @@ -633,7 +633,7 @@ define <16 x i32> @test_mul_v16i32_v16i16(<16 x i16> %A) { ; SLOW64-NEXT: movdqa %xmm1, %xmm3 ; SLOW64-NEXT: movdqa %xmm0, %xmm1 ; SLOW64-NEXT: movdqa {{.*#+}} xmm2 = [18778,18778,18778,18778,18778,18778,18778,18778] -; SLOW64-NEXT: movdqa %xmm0, %xmm4 +; SLOW64-NEXT: movdqa %xmm1, %xmm4 ; SLOW64-NEXT: pmulhuw %xmm2, %xmm4 ; SLOW64-NEXT: pmullw %xmm2, %xmm1 ; SLOW64-NEXT: movdqa %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll index 07495e159e13..288879febb15 100644 --- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -201,7 +201,7 @@ define float @f32_estimate(float %x) #1 { ; SSE: # %bb.0: ; SSE-NEXT: rsqrtss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: mulss %xmm1, %xmm2 +; SSE-NEXT: mulss %xmm2, %xmm2 ; SSE-NEXT: mulss %xmm0, %xmm2 ; SSE-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE-NEXT: mulss {{.*}}(%rip), %xmm1 @@ -247,7 +247,7 @@ define <4 x float> @v4f32_estimate(<4 x float> %x) #1 { ; SSE: # %bb.0: ; SSE-NEXT: rsqrtps %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: mulps %xmm1, %xmm2 +; SSE-NEXT: mulps %xmm2, %xmm2 ; SSE-NEXT: mulps %xmm0, %xmm2 ; SSE-NEXT: addps {{.*}}(%rip), %xmm2 ; SSE-NEXT: mulps {{.*}}(%rip), %xmm1 @@ -297,7 +297,7 @@ define <8 x float> @v8f32_estimate(<8 x float> %x) #1 { ; SSE-NEXT: rsqrtps %xmm0, %xmm3 ; SSE-NEXT: movaps {{.*#+}} xmm4 = [-5.000000e-01,-5.000000e-01,-5.000000e-01,-5.000000e-01] ; SSE-NEXT: movaps %xmm3, %xmm2 -; SSE-NEXT: mulps %xmm3, %xmm2 +; SSE-NEXT: mulps %xmm2, %xmm2 ; SSE-NEXT: mulps %xmm0, %xmm2 ; SSE-NEXT: movaps {{.*#+}} xmm0 = [-3.000000e+00,-3.000000e+00,-3.000000e+00,-3.000000e+00] ; SSE-NEXT: addps %xmm0, %xmm2 @@ -305,7 +305,7 @@ define <8 x float> @v8f32_estimate(<8 x float> %x) #1 { ; SSE-NEXT: mulps %xmm3, %xmm2 ; SSE-NEXT: rsqrtps %xmm1, %xmm5 ; SSE-NEXT: movaps %xmm5, %xmm3 -; SSE-NEXT: mulps %xmm5, %xmm3 +; SSE-NEXT: mulps %xmm3, %xmm3 ; SSE-NEXT: mulps %xmm1, %xmm3 ; SSE-NEXT: addps %xmm0, %xmm3 ; SSE-NEXT: mulps %xmm4, %xmm3 diff --git a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll index 67174c5463e0..60e041b05abe 100644 --- a/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll +++ b/llvm/test/CodeGen/X86/sse-scalar-fp-arith.ll @@ -1084,7 +1084,8 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, ; SSE2-NEXT: testb $1, %dil ; SSE2-NEXT: jne .LBB62_1 ; SSE2-NEXT: # %bb.2: -; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE2-NEXT: movaps %xmm2, %xmm1 +; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE2-NEXT: retq ; SSE2-NEXT: .LBB62_1: ; SSE2-NEXT: addss %xmm0, %xmm1 @@ -1096,7 +1097,8 @@ define <4 x float> @add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, ; SSE41-NEXT: testb $1, %dil ; SSE41-NEXT: jne .LBB62_1 ; SSE41-NEXT: # %bb.2: -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] ; SSE41-NEXT: retq ; SSE41-NEXT: .LBB62_1: ; SSE41-NEXT: addss %xmm0, %xmm1 @@ -1137,7 +1139,8 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> ; SSE2-NEXT: testb $1, %dil ; SSE2-NEXT: jne .LBB63_1 ; SSE2-NEXT: # %bb.2: -; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm2[0],xmm0[1] +; SSE2-NEXT: movapd %xmm2, %xmm1 +; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] ; SSE2-NEXT: retq ; SSE2-NEXT: .LBB63_1: ; SSE2-NEXT: addsd %xmm0, %xmm1 @@ -1149,7 +1152,8 @@ define <2 x double> @add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> ; SSE41-NEXT: testb $1, %dil ; SSE41-NEXT: jne .LBB63_1 ; SSE41-NEXT: # %bb.2: -; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0,1],xmm0[2,3] +; SSE41-NEXT: movaps %xmm2, %xmm1 +; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] ; SSE41-NEXT: retq ; SSE41-NEXT: .LBB63_1: ; SSE41-NEXT: addsd %xmm0, %xmm1 diff --git a/llvm/test/CodeGen/X86/sse1.ll b/llvm/test/CodeGen/X86/sse1.ll index b405b8aa2f50..7222a27c826b 100644 --- a/llvm/test/CodeGen/X86/sse1.ll +++ b/llvm/test/CodeGen/X86/sse1.ll @@ -16,7 +16,7 @@ define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind { ; X32-LABEL: test4: ; X32: # %bb.0: # %entry ; X32-NEXT: movaps %xmm0, %xmm2 -; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] +; X32-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X32-NEXT: addss %xmm1, %xmm0 ; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X32-NEXT: subss %xmm1, %xmm2 @@ -26,7 +26,7 @@ define <2 x float> @test4(<2 x float> %A, <2 x float> %B) nounwind { ; X64-LABEL: test4: ; X64: # %bb.0: # %entry ; X64-NEXT: movaps %xmm0, %xmm2 -; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] +; X64-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X64-NEXT: addss %xmm1, %xmm0 ; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; X64-NEXT: subss %xmm1, %xmm2 diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll index 543b5ed41a34..aba916241f3a 100644 --- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -406,9 +406,9 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm0, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: movaps %xmm1, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm4[1,1] ; SSE-NEXT: subss %xmm4, %xmm3 ; SSE-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm0, %xmm4 diff --git a/llvm/test/CodeGen/X86/statepoint-live-in.ll b/llvm/test/CodeGen/X86/statepoint-live-in.ll index 69affe2a9fcb..2c9b95916d8a 100644 --- a/llvm/test/CodeGen/X86/statepoint-live-in.ll +++ b/llvm/test/CodeGen/X86/statepoint-live-in.ll @@ -114,7 +114,7 @@ define void @test6(i32 %a) gc "statepoint-example" { ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: movl %edi, %ebx -; CHECK-NEXT: movl %edi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq _baz ; CHECK-NEXT: Ltmp6: ; CHECK-NEXT: callq _bar diff --git a/llvm/test/CodeGen/X86/statepoint-stack-usage.ll b/llvm/test/CodeGen/X86/statepoint-stack-usage.ll index 73b0d6a18071..6e7fc7bf1c07 100644 --- a/llvm/test/CodeGen/X86/statepoint-stack-usage.ll +++ b/llvm/test/CodeGen/X86/statepoint-stack-usage.ll @@ -61,9 +61,9 @@ define i32 @back_to_back_deopt(i32 %a, i32 %b, i32 %c) #1 gc "statepoint-example" { ; CHECK-LABEL: back_to_back_deopt ; The exact stores don't matter, but there need to be three stack slots created -; CHECK-DAG: movl %edi, 12(%rsp) -; CHECK-DAG: movl %esi, 8(%rsp) -; CHECK-DAG: movl %edx, 4(%rsp) +; CHECK-DAG: movl %ebx, 12(%rsp) +; CHECK-DAG: movl %ebp, 8(%rsp) +; CHECK-DAG: movl %r14d, 4(%rsp) ; CHECK: callq ; CHECK-DAG: movl %ebx, 12(%rsp) ; CHECK-DAG: movl %ebp, 8(%rsp) diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index 0acd7f2292e4..5c835a172fdb 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -1016,12 +1016,12 @@ define <4 x i64> @fptosi_4f32_to_4i64(<8 x float> %a) { ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] @@ -1124,12 +1124,12 @@ define <4 x i64> @fptosi_8f32_to_4i64(<8 x float> %a) { ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movq %rax, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1,2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm1 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movq %rax, %xmm3 ; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] @@ -1314,11 +1314,11 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { ; SSE-LABEL: fptoui_4f32_to_4i32: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -1556,7 +1556,7 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movaps %xmm2, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm2[1],xmm3[1] +; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] @@ -1568,11 +1568,11 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { ; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] ; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0] ; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm3[1,1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] @@ -1683,7 +1683,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 ; SSE-NEXT: cvttss2si %xmm4, %rcx @@ -1694,7 +1694,7 @@ define <4 x i64> @fptoui_4f32_to_4i64(<8 x float> %a) { ; SSE-NEXT: movq %rdx, %xmm3 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 ; SSE-NEXT: cvttss2si %xmm4, %rcx @@ -1861,7 +1861,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { ; SSE-NEXT: cmovaeq %rcx, %rdx ; SSE-NEXT: movq %rdx, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1,2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 ; SSE-NEXT: cvttss2si %xmm4, %rcx @@ -1872,7 +1872,7 @@ define <4 x i64> @fptoui_8f32_to_4i64(<8 x float> %a) { ; SSE-NEXT: movq %rdx, %xmm3 ; SSE-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] ; SSE-NEXT: movaps %xmm0, %xmm3 -; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] +; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE-NEXT: movaps %xmm3, %xmm4 ; SSE-NEXT: subss %xmm1, %xmm4 ; SSE-NEXT: cvttss2si %xmm4, %rcx diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index cc158e773308..355fa68c76a7 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -1591,7 +1591,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; SSE-LABEL: uitofp_2i64_to_4f32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB39_1 ; SSE-NEXT: # %bb.2: @@ -1819,7 +1819,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE-LABEL: uitofp_4i64_to_4f32_undef: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm1 -; SSE-NEXT: movq %xmm0, %rax +; SSE-NEXT: movq %xmm1, %rax ; SSE-NEXT: testq %rax, %rax ; SSE-NEXT: js .LBB41_1 ; SSE-NEXT: # %bb.2: diff --git a/llvm/test/CodeGen/X86/vec_minmax_uint.ll b/llvm/test/CodeGen/X86/vec_minmax_uint.ll index 0df27cb0a3d9..e1dcad984ac4 100644 --- a/llvm/test/CodeGen/X86/vec_minmax_uint.ll +++ b/llvm/test/CodeGen/X86/vec_minmax_uint.ll @@ -1006,7 +1006,7 @@ define <4 x i64> @min_lt_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE42: # %bb.0: ; SSE42-NEXT: movdqa %xmm0, %xmm4 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; SSE42-NEXT: movdqa %xmm0, %xmm6 +; SSE42-NEXT: movdqa %xmm4, %xmm6 ; SSE42-NEXT: pxor %xmm5, %xmm6 ; SSE42-NEXT: movdqa %xmm2, %xmm0 ; SSE42-NEXT: pxor %xmm5, %xmm0 @@ -1426,7 +1426,7 @@ define <4 x i64> @min_le_v4i64(<4 x i64> %a, <4 x i64> %b) { ; SSE42: # %bb.0: ; SSE42-NEXT: movdqa %xmm0, %xmm4 ; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; SSE42-NEXT: movdqa %xmm0, %xmm6 +; SSE42-NEXT: movdqa %xmm4, %xmm6 ; SSE42-NEXT: pxor %xmm5, %xmm6 ; SSE42-NEXT: movdqa %xmm2, %xmm0 ; SSE42-NEXT: pxor %xmm5, %xmm0 diff --git a/llvm/test/CodeGen/X86/vec_shift4.ll b/llvm/test/CodeGen/X86/vec_shift4.ll index d54c9cd620b8..04b4cb658f15 100644 --- a/llvm/test/CodeGen/X86/vec_shift4.ll +++ b/llvm/test/CodeGen/X86/vec_shift4.ll @@ -35,7 +35,7 @@ define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { ; X32: # %bb.0: # %entry ; X32-NEXT: movdqa %xmm0, %xmm2 ; X32-NEXT: psllw $5, %xmm1 -; X32-NEXT: movdqa %xmm0, %xmm3 +; X32-NEXT: movdqa %xmm2, %xmm3 ; X32-NEXT: psllw $4, %xmm3 ; X32-NEXT: pand {{\.LCPI.*}}, %xmm3 ; X32-NEXT: movdqa %xmm1, %xmm0 @@ -47,7 +47,7 @@ define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { ; X32-NEXT: movdqa %xmm1, %xmm0 ; X32-NEXT: pblendvb %xmm0, %xmm3, %xmm2 ; X32-NEXT: movdqa %xmm2, %xmm3 -; X32-NEXT: paddb %xmm2, %xmm3 +; X32-NEXT: paddb %xmm3, %xmm3 ; X32-NEXT: paddb %xmm1, %xmm1 ; X32-NEXT: movdqa %xmm1, %xmm0 ; X32-NEXT: pblendvb %xmm0, %xmm3, %xmm2 @@ -58,7 +58,7 @@ define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { ; X64: # %bb.0: # %entry ; X64-NEXT: movdqa %xmm0, %xmm2 ; X64-NEXT: psllw $5, %xmm1 -; X64-NEXT: movdqa %xmm0, %xmm3 +; X64-NEXT: movdqa %xmm2, %xmm3 ; X64-NEXT: psllw $4, %xmm3 ; X64-NEXT: pand {{.*}}(%rip), %xmm3 ; X64-NEXT: movdqa %xmm1, %xmm0 @@ -70,7 +70,7 @@ define <2 x i64> @shl2(<16 x i8> %r, <16 x i8> %a) nounwind readnone ssp { ; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: pblendvb %xmm0, %xmm3, %xmm2 ; X64-NEXT: movdqa %xmm2, %xmm3 -; X64-NEXT: paddb %xmm2, %xmm3 +; X64-NEXT: paddb %xmm3, %xmm3 ; X64-NEXT: paddb %xmm1, %xmm1 ; X64-NEXT: movdqa %xmm1, %xmm0 ; X64-NEXT: pblendvb %xmm0, %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-blend.ll b/llvm/test/CodeGen/X86/vector-blend.ll index d2b31b48fa7f..524809487644 100644 --- a/llvm/test/CodeGen/X86/vector-blend.ll +++ b/llvm/test/CodeGen/X86/vector-blend.ll @@ -954,7 +954,7 @@ define <4 x i32> @blend_neg_logic_v4i32_2(<4 x i32> %v, <4 x i32> %c) { ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: pxor %xmm3, %xmm3 -; SSE41-NEXT: psubd %xmm0, %xmm3 +; SSE41-NEXT: psubd %xmm2, %xmm3 ; SSE41-NEXT: movaps %xmm1, %xmm0 ; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm3 ; SSE41-NEXT: movaps %xmm3, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll index 76005a037b97..61787fc19dfa 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-sdiv-128.ll @@ -177,13 +177,13 @@ define <16 x i8> @test_div7_16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: test_div7_16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65427,65427,65427,65427,65427,65427,65427,65427] ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm1 ; SSE2-NEXT: pmullw %xmm3, %xmm1 ; SSE2-NEXT: psrlw $8, %xmm1 @@ -501,13 +501,13 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE2-LABEL: test_rem7_16i8: ; SSE2: # %bb.0: ; SSE2-NEXT: movdqa %xmm0, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm0[8],xmm2[9],xmm0[9],xmm2[10],xmm0[10],xmm2[11],xmm0[11],xmm2[12],xmm0[12],xmm2[13],xmm0[13],xmm2[14],xmm0[14],xmm2[15],xmm0[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [65427,65427,65427,65427,65427,65427,65427,65427] ; SSE2-NEXT: pmullw %xmm3, %xmm2 ; SSE2-NEXT: psrlw $8, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] ; SSE2-NEXT: psraw $8, %xmm1 ; SSE2-NEXT: pmullw %xmm3, %xmm1 ; SSE2-NEXT: psrlw $8, %xmm1 @@ -523,7 +523,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: paddb %xmm2, %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; SSE2-NEXT: pmullw %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll index c85128893dac..9788cc037d41 100644 --- a/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll +++ b/llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll @@ -497,7 +497,7 @@ define <16 x i8> @test_rem7_16i8(<16 x i8> %a) nounwind { ; SSE2-NEXT: psrlw $2, %xmm1 ; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movdqa %xmm1, %xmm2 -; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] +; SSE2-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] ; SSE2-NEXT: psraw $8, %xmm2 ; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7] ; SSE2-NEXT: pmullw %xmm3, %xmm2 diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index 811084ea892c..642da7c0137c 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -178,7 +178,7 @@ define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounw ; X86-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: ; X86: # %bb.0: ; X86-NEXT: movdqa %xmm0, %xmm1 -; X86-NEXT: movdqa %xmm0, %xmm2 +; X86-NEXT: movdqa %xmm1, %xmm2 ; X86-NEXT: psllw $4, %xmm2 ; X86-NEXT: pand {{\.LCPI.*}}, %xmm2 ; X86-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,8192,24640,8192,24640,8192,24640] @@ -189,7 +189,7 @@ define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounw ; X86-NEXT: paddb %xmm0, %xmm0 ; X86-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; X86-NEXT: movdqa %xmm1, %xmm2 -; X86-NEXT: paddb %xmm1, %xmm2 +; X86-NEXT: paddb %xmm2, %xmm2 ; X86-NEXT: paddb %xmm0, %xmm0 ; X86-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; X86-NEXT: movdqa %xmm1, %xmm0 @@ -198,7 +198,7 @@ define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounw ; X64-LABEL: mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8: ; X64: # %bb.0: ; X64-NEXT: movdqa %xmm0, %xmm1 -; X64-NEXT: movdqa %xmm0, %xmm2 +; X64-NEXT: movdqa %xmm1, %xmm2 ; X64-NEXT: psllw $4, %xmm2 ; X64-NEXT: pand {{.*}}(%rip), %xmm2 ; X64-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,8192,24640,8192,24640,8192,24640] @@ -209,7 +209,7 @@ define <16 x i8> @mul_v16i8_1_2_4_8_1_2_4_8_1_2_4_8_1_2_4_8(<16 x i8> %a0) nounw ; X64-NEXT: paddb %xmm0, %xmm0 ; X64-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; X64-NEXT: movdqa %xmm1, %xmm2 -; X64-NEXT: paddb %xmm1, %xmm2 +; X64-NEXT: paddb %xmm2, %xmm2 ; X64-NEXT: paddb %xmm0, %xmm0 ; X64-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; X64-NEXT: movdqa %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-rotate-128.ll b/llvm/test/CodeGen/X86/vector-rotate-128.ll index 44c0c7eded45..0b0f94946927 100644 --- a/llvm/test/CodeGen/X86/vector-rotate-128.ll +++ b/llvm/test/CodeGen/X86/vector-rotate-128.ll @@ -359,7 +359,7 @@ define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE41-NEXT: psllw $4, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm4 -; SSE41-NEXT: paddw %xmm1, %xmm4 +; SSE41-NEXT: paddw %xmm4, %xmm4 ; SSE41-NEXT: movdqa %xmm3, %xmm6 ; SSE41-NEXT: psllw $8, %xmm6 ; SSE41-NEXT: movdqa %xmm3, %xmm5 @@ -384,7 +384,7 @@ define <8 x i16> @var_rotate_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: por %xmm0, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: paddw %xmm2, %xmm1 +; SSE41-NEXT: paddw %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm3, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: movdqa %xmm2, %xmm0 @@ -629,10 +629,10 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41-NEXT: movdqa {{.*#+}} xmm2 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; SSE41-NEXT: psubb %xmm3, %xmm2 ; SSE41-NEXT: psllw $5, %xmm3 -; SSE41-NEXT: movdqa %xmm0, %xmm5 +; SSE41-NEXT: movdqa %xmm1, %xmm5 ; SSE41-NEXT: psllw $4, %xmm5 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm5 -; SSE41-NEXT: movdqa %xmm0, %xmm4 +; SSE41-NEXT: movdqa %xmm1, %xmm4 ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm5, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm5 @@ -642,13 +642,13 @@ define <16 x i8> @var_rotate_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm5, %xmm4 ; SSE41-NEXT: movdqa %xmm4, %xmm5 -; SSE41-NEXT: paddb %xmm4, %xmm5 +; SSE41-NEXT: paddb %xmm5, %xmm5 ; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm5, %xmm4 ; SSE41-NEXT: psllw $5, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: paddb %xmm2, %xmm3 +; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm1, %xmm5 ; SSE41-NEXT: psrlw $4, %xmm5 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm5 @@ -1202,7 +1202,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: constant_rotate_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm1, %xmm3 ; SSE41-NEXT: psllw $4, %xmm3 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,57600,41152,24704,8256] @@ -1214,7 +1214,7 @@ define <16 x i8> @constant_rotate_v16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: paddb %xmm0, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: paddb %xmm2, %xmm3 +; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: paddb %xmm0, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 ; SSE41-NEXT: movdqa %xmm1, %xmm3 diff --git a/llvm/test/CodeGen/X86/vector-sext.ll b/llvm/test/CodeGen/X86/vector-sext.ll index 591c521ec81a..8d803b6cf7dd 100644 --- a/llvm/test/CodeGen/X86/vector-sext.ll +++ b/llvm/test/CodeGen/X86/vector-sext.ll @@ -243,7 +243,7 @@ define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp ; SSSE3-LABEL: sext_16i8_to_8i32: ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: psrad $24, %xmm0 ; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,4,u,u,u,5,u,u,u,6,u,u,u,7] @@ -312,7 +312,7 @@ define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ss ; SSSE3-LABEL: sext_16i8_to_16i32: ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm3 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: psrad $24, %xmm0 ; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15] @@ -443,7 +443,7 @@ define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp ; SSSE3-LABEL: sext_16i8_to_4i64: ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm1 -; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSSE3-NEXT: movdqa %xmm0, %xmm2 ; SSSE3-NEXT: psrad $31, %xmm2 @@ -499,7 +499,7 @@ define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp ; SSE2-LABEL: sext_16i8_to_8i64: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 -; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] ; SSE2-NEXT: movdqa %xmm0, %xmm2 ; SSE2-NEXT: psrad $31, %xmm2 @@ -1108,7 +1108,7 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp ; SSE2-NEXT: movdqa %xmm1, %xmm2 ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: psrad $31, %xmm3 -; SSE2-NEXT: movdqa %xmm1, %xmm4 +; SSE2-NEXT: movdqa %xmm2, %xmm4 ; SSE2-NEXT: psrad $31, %xmm4 ; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] @@ -1127,7 +1127,7 @@ define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp ; SSSE3-NEXT: movdqa %xmm1, %xmm2 ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: psrad $31, %xmm3 -; SSSE3-NEXT: movdqa %xmm1, %xmm4 +; SSSE3-NEXT: movdqa %xmm2, %xmm4 ; SSSE3-NEXT: psrad $31, %xmm4 ; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] diff --git a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll index 82a42c94a845..30616c479496 100644 --- a/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -273,7 +273,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE41-NEXT: psllw $4, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: paddw %xmm1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: psraw $8, %xmm4 ; SSE41-NEXT: movdqa %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll index 8d1760e1b3f5..fc0aa84c9936 100644 --- a/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-lshr-128.ll @@ -243,7 +243,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE41-NEXT: psllw $4, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: paddw %xmm1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: psrlw $8, %xmm4 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -408,7 +408,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: psllw $5, %xmm1 -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: psrlw $4, %xmm3 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -701,7 +701,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41-NEXT: pshufb %xmm0, %xmm1 ; SSE41-NEXT: psllw $5, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: paddb %xmm1, %xmm3 +; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: psrlw $4, %xmm4 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm4 @@ -1147,7 +1147,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psrlw $4, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32] diff --git a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll index 4446c6d6a15f..e7526fd541e7 100644 --- a/llvm/test/CodeGen/X86/vector-shift-shl-128.ll +++ b/llvm/test/CodeGen/X86/vector-shift-shl-128.ll @@ -200,7 +200,7 @@ define <8 x i16> @var_shift_v8i16(<8 x i16> %a, <8 x i16> %b) nounwind { ; SSE41-NEXT: psllw $4, %xmm1 ; SSE41-NEXT: por %xmm0, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: paddw %xmm1, %xmm3 +; SSE41-NEXT: paddw %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: psllw $8, %xmm4 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -362,7 +362,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm2 ; SSE41-NEXT: psllw $5, %xmm1 -; SSE41-NEXT: movdqa %xmm0, %xmm3 +; SSE41-NEXT: movdqa %xmm2, %xmm3 ; SSE41-NEXT: psllw $4, %xmm3 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm3 ; SSE41-NEXT: movdqa %xmm1, %xmm0 @@ -374,7 +374,7 @@ define <16 x i8> @var_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm3 -; SSE41-NEXT: paddb %xmm2, %xmm3 +; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: paddb %xmm1, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm3, %xmm2 @@ -649,7 +649,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41-NEXT: pshufb %xmm0, %xmm1 ; SSE41-NEXT: psllw $5, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm3 -; SSE41-NEXT: paddb %xmm1, %xmm3 +; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm2, %xmm4 ; SSE41-NEXT: psllw $4, %xmm4 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm4 @@ -661,7 +661,7 @@ define <16 x i8> @splatvar_shift_v16i8(<16 x i8> %a, <16 x i8> %b) nounwind { ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 ; SSE41-NEXT: movdqa %xmm2, %xmm1 -; SSE41-NEXT: paddb %xmm2, %xmm1 +; SSE41-NEXT: paddb %xmm1, %xmm1 ; SSE41-NEXT: paddb %xmm3, %xmm3 ; SSE41-NEXT: movdqa %xmm3, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm1, %xmm2 @@ -1001,7 +1001,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41-LABEL: constant_shift_v16i8: ; SSE41: # %bb.0: ; SSE41-NEXT: movdqa %xmm0, %xmm1 -; SSE41-NEXT: movdqa %xmm0, %xmm2 +; SSE41-NEXT: movdqa %xmm1, %xmm2 ; SSE41-NEXT: psllw $4, %xmm2 ; SSE41-NEXT: pand {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movdqa {{.*#+}} xmm0 = [8192,24640,41088,57536,49376,32928,16480,32] @@ -1012,7 +1012,7 @@ define <16 x i8> @constant_shift_v16i8(<16 x i8> %a) nounwind { ; SSE41-NEXT: paddb %xmm0, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm2 -; SSE41-NEXT: paddb %xmm1, %xmm2 +; SSE41-NEXT: paddb %xmm2, %xmm2 ; SSE41-NEXT: paddb %xmm0, %xmm0 ; SSE41-NEXT: pblendvb %xmm0, %xmm2, %xmm1 ; SSE41-NEXT: movdqa %xmm1, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 3ab3a08d5e90..8ce581a40cfe 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2703,7 +2703,7 @@ define <4 x float> @PR22377(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: PR22377: ; SSE: # %bb.0: # %entry ; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3],xmm0[1,3] +; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,3,1,3] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2,0,2] ; SSE-NEXT: addps %xmm0, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] diff --git a/llvm/test/CodeGen/X86/vector-trunc-math.ll b/llvm/test/CodeGen/X86/vector-trunc-math.ll index f0a5449585c4..80629a388b48 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-math.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-math.ll @@ -5511,7 +5511,7 @@ define <4 x i32> @mul_add_const_v4i64_v4i32(<4 x i32> %a0, <4 x i32> %a1) nounwi ; SSE-LABEL: mul_add_const_v4i64_v4i32: ; SSE: # %bb.0: ; SSE-NEXT: movdqa %xmm0, %xmm2 -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3] +; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] ; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[2,1,3,3] ; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm1[0,1,1,3] ; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll index 155b23ca0a99..0e7f9139919a 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -2302,7 +2302,7 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ; SSE2-NEXT: pandn %xmm13, %xmm7 ; SSE2-NEXT: por %xmm5, %xmm7 ; SSE2-NEXT: movdqa %xmm7, %xmm10 -; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm4, %xmm5 ; SSE2-NEXT: pxor %xmm9, %xmm5 ; SSE2-NEXT: movdqa %xmm11, %xmm6 @@ -2317,7 +2317,7 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ; SSE2-NEXT: pandn %xmm13, %xmm6 ; SSE2-NEXT: por %xmm4, %xmm6 ; SSE2-NEXT: movdqa %xmm6, %xmm7 -; SSE2-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm3, %xmm4 ; SSE2-NEXT: pxor %xmm9, %xmm4 ; SSE2-NEXT: movdqa %xmm11, %xmm5 @@ -2332,7 +2332,7 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ; SSE2-NEXT: pandn %xmm13, %xmm5 ; SSE2-NEXT: por %xmm3, %xmm5 ; SSE2-NEXT: movdqa %xmm5, %xmm8 -; SSE2-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSE2-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSE2-NEXT: movdqa %xmm2, %xmm3 ; SSE2-NEXT: pxor %xmm9, %xmm3 ; SSE2-NEXT: movdqa %xmm11, %xmm4 @@ -2527,7 +2527,7 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ; SSSE3-NEXT: pandn %xmm13, %xmm7 ; SSSE3-NEXT: por %xmm5, %xmm7 ; SSSE3-NEXT: movdqa %xmm7, %xmm10 -; SSSE3-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSSE3-NEXT: movdqa %xmm10, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSSE3-NEXT: movdqa %xmm4, %xmm5 ; SSSE3-NEXT: pxor %xmm9, %xmm5 ; SSSE3-NEXT: movdqa %xmm11, %xmm6 @@ -2542,7 +2542,7 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ; SSSE3-NEXT: pandn %xmm13, %xmm6 ; SSSE3-NEXT: por %xmm4, %xmm6 ; SSSE3-NEXT: movdqa %xmm6, %xmm7 -; SSSE3-NEXT: movdqa %xmm6, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSSE3-NEXT: movdqa %xmm7, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSSE3-NEXT: movdqa %xmm3, %xmm4 ; SSSE3-NEXT: pxor %xmm9, %xmm4 ; SSSE3-NEXT: movdqa %xmm11, %xmm5 @@ -2557,7 +2557,7 @@ define <16 x i8> @trunc_packus_v16i64_v16i8(<16 x i64> %a0) { ; SSSE3-NEXT: pandn %xmm13, %xmm5 ; SSSE3-NEXT: por %xmm3, %xmm5 ; SSSE3-NEXT: movdqa %xmm5, %xmm8 -; SSSE3-NEXT: movdqa %xmm5, -{{[0-9]+}}(%rsp) # 16-byte Spill +; SSSE3-NEXT: movdqa %xmm8, -{{[0-9]+}}(%rsp) # 16-byte Spill ; SSSE3-NEXT: movdqa %xmm2, %xmm3 ; SSSE3-NEXT: pxor %xmm9, %xmm3 ; SSSE3-NEXT: movdqa %xmm11, %xmm4 diff --git a/llvm/test/CodeGen/X86/vector-zext.ll b/llvm/test/CodeGen/X86/vector-zext.ll index b3c9fbe58f0a..2f3819cc93c7 100644 --- a/llvm/test/CodeGen/X86/vector-zext.ll +++ b/llvm/test/CodeGen/X86/vector-zext.ll @@ -247,7 +247,7 @@ define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ss ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm3, %xmm1 ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] @@ -262,7 +262,7 @@ define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ss ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: pxor %xmm4, %xmm4 -; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: movdqa %xmm3, %xmm1 ; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] @@ -400,7 +400,7 @@ define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] +; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] ; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] ; SSE2-NEXT: movdqa %xmm1, %xmm0 @@ -701,7 +701,7 @@ define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movdqa %xmm0, %xmm3 ; SSE2-NEXT: pxor %xmm4, %xmm4 -; SSE2-NEXT: movdqa %xmm0, %xmm1 +; SSE2-NEXT: movdqa %xmm3, %xmm1 ; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] ; SSE2-NEXT: movdqa %xmm1, %xmm0 ; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] @@ -716,7 +716,7 @@ define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp ; SSSE3: # %bb.0: # %entry ; SSSE3-NEXT: movdqa %xmm0, %xmm3 ; SSSE3-NEXT: pxor %xmm4, %xmm4 -; SSSE3-NEXT: movdqa %xmm0, %xmm1 +; SSSE3-NEXT: movdqa %xmm3, %xmm1 ; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] ; SSSE3-NEXT: movdqa %xmm1, %xmm0 ; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] @@ -1583,7 +1583,7 @@ define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero ; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] ; SSE41-NEXT: retq ; @@ -1631,7 +1631,7 @@ define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ; SSE41: # %bb.0: # %entry ; SSE41-NEXT: movdqa %xmm0, %xmm1 ; SSE41-NEXT: pxor %xmm2, %xmm2 -; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero +; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero ; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] ; SSE41-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vselect-minmax.ll b/llvm/test/CodeGen/X86/vselect-minmax.ll index 600ae3f73485..28f94aafdd7e 100644 --- a/llvm/test/CodeGen/X86/vselect-minmax.ll +++ b/llvm/test/CodeGen/X86/vselect-minmax.ll @@ -5015,7 +5015,7 @@ define <8 x i64> @test125(<8 x i64> %a, <8 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm9 ; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm10 ; SSE4-NEXT: pxor %xmm8, %xmm10 ; SSE4-NEXT: movdqa %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm8, %xmm0 @@ -5162,7 +5162,7 @@ define <8 x i64> @test126(<8 x i64> %a, <8 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm9 ; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm10 ; SSE4-NEXT: pxor %xmm8, %xmm10 ; SSE4-NEXT: movdqa %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm8, %xmm0 @@ -7271,10 +7271,10 @@ entry: define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) { ; SSE2-LABEL: test156: ; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,0,2147483648,0] -; SSE2-NEXT: movdqa %xmm0, %xmm9 +; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648,2147483648,2147483648] +; SSE2-NEXT: movdqa %xmm4, %xmm9 ; SSE2-NEXT: pxor %xmm8, %xmm9 -; SSE2-NEXT: movdqa %xmm4, %xmm10 +; SSE2-NEXT: movdqa %xmm0, %xmm10 ; SSE2-NEXT: pxor %xmm8, %xmm10 ; SSE2-NEXT: movdqa %xmm10, %xmm11 ; SSE2-NEXT: pcmpgtd %xmm9, %xmm11 @@ -7287,9 +7287,9 @@ define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) { ; SSE2-NEXT: pand %xmm10, %xmm0 ; SSE2-NEXT: pandn %xmm4, %xmm10 ; SSE2-NEXT: por %xmm10, %xmm0 -; SSE2-NEXT: movdqa %xmm1, %xmm9 +; SSE2-NEXT: movdqa %xmm5, %xmm9 ; SSE2-NEXT: pxor %xmm8, %xmm9 -; SSE2-NEXT: movdqa %xmm5, %xmm4 +; SSE2-NEXT: movdqa %xmm1, %xmm4 ; SSE2-NEXT: pxor %xmm8, %xmm4 ; SSE2-NEXT: movdqa %xmm4, %xmm10 ; SSE2-NEXT: pcmpgtd %xmm9, %xmm10 @@ -7302,9 +7302,9 @@ define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) { ; SSE2-NEXT: pand %xmm4, %xmm1 ; SSE2-NEXT: pandn %xmm5, %xmm4 ; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm2, %xmm4 +; SSE2-NEXT: movdqa %xmm6, %xmm4 ; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: movdqa %xmm6, %xmm5 +; SSE2-NEXT: movdqa %xmm2, %xmm5 ; SSE2-NEXT: pxor %xmm8, %xmm5 ; SSE2-NEXT: movdqa %xmm5, %xmm9 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm9 @@ -7317,9 +7317,9 @@ define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) { ; SSE2-NEXT: pand %xmm5, %xmm2 ; SSE2-NEXT: pandn %xmm6, %xmm5 ; SSE2-NEXT: por %xmm5, %xmm2 -; SSE2-NEXT: movdqa %xmm3, %xmm4 +; SSE2-NEXT: movdqa %xmm7, %xmm4 ; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: pxor %xmm7, %xmm8 +; SSE2-NEXT: pxor %xmm3, %xmm8 ; SSE2-NEXT: movdqa %xmm8, %xmm5 ; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 ; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] @@ -7335,18 +7335,30 @@ define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) { ; ; SSE4-LABEL: test156: ; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm4, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm8, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 +; SSE4-NEXT: movdqa %xmm0, %xmm9 +; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] +; SSE4-NEXT: movdqa %xmm4, %xmm10 +; SSE4-NEXT: pxor %xmm8, %xmm10 +; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 +; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 +; SSE4-NEXT: movdqa %xmm5, %xmm9 +; SSE4-NEXT: pxor %xmm8, %xmm9 +; SSE4-NEXT: movdqa %xmm1, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm2, %xmm0 +; SSE4-NEXT: movdqa %xmm6, %xmm1 +; SSE4-NEXT: pxor %xmm8, %xmm1 +; SSE4-NEXT: movdqa %xmm2, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 ; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm3, %xmm0 +; SSE4-NEXT: pxor %xmm8, %xmm0 +; SSE4-NEXT: pxor %xmm3, %xmm8 +; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 +; SSE4-NEXT: movdqa %xmm8, %xmm0 ; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 ; SSE4-NEXT: movapd %xmm4, %xmm0 ; SSE4-NEXT: movapd %xmm5, %xmm1 @@ -7356,322 +7368,44 @@ define <8 x i64> @test156(<8 x i64> %a, <8 x i64> %b) { ; ; AVX1-LABEL: test156: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm0, %xmm2, %xmm5 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm4 +; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm6 +; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm7 +; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 +; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 ; AVX1-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 +; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 +; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 +; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 ; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpcmpgtq %xmm1, %xmm3, %xmm4 +; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm4 +; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm5 +; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 ; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm3, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test156: ; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpcmpgtq %ymm0, %ymm2, %ymm4 -; AVX2-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpcmpgtq %ymm1, %ymm3, %ymm2 +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] +; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm5 +; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm6 +; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 +; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm2 +; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm4 +; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm2 ; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm3, %ymm1 ; AVX2-NEXT: retq ; ; AVX512F-LABEL: test156: ; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: retq -entry: - %cmp = icmp sge <8 x i64> %a, %b - %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a - ret <8 x i64> %sel -} - -define <8 x i64> @test157(<8 x i64> %a, <8 x i64> %b) { -; SSE2-LABEL: test157: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm4, %xmm9 -; SSE2-NEXT: pxor %xmm8, %xmm9 -; SSE2-NEXT: movdqa %xmm0, %xmm10 -; SSE2-NEXT: pxor %xmm8, %xmm10 -; SSE2-NEXT: movdqa %xmm10, %xmm11 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm11 -; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm10 -; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3] -; SSE2-NEXT: pand %xmm12, %xmm9 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm11[1,1,3,3] -; SSE2-NEXT: por %xmm9, %xmm10 -; SSE2-NEXT: pand %xmm10, %xmm0 -; SSE2-NEXT: pandn %xmm4, %xmm10 -; SSE2-NEXT: por %xmm10, %xmm0 -; SSE2-NEXT: movdqa %xmm5, %xmm9 -; SSE2-NEXT: pxor %xmm8, %xmm9 -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: movdqa %xmm4, %xmm10 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm10 -; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm4[1,1,3,3] -; SSE2-NEXT: pand %xmm11, %xmm9 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm10[1,1,3,3] -; SSE2-NEXT: por %xmm9, %xmm4 -; SSE2-NEXT: pand %xmm4, %xmm1 -; SSE2-NEXT: pandn %xmm5, %xmm4 -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm6, %xmm4 -; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: movdqa %xmm2, %xmm5 -; SSE2-NEXT: pxor %xmm8, %xmm5 -; SSE2-NEXT: movdqa %xmm5, %xmm9 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm9 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] -; SSE2-NEXT: pand %xmm10, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm9[1,1,3,3] -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm2 -; SSE2-NEXT: pandn %xmm6, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm2 -; SSE2-NEXT: movdqa %xmm7, %xmm4 -; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: pxor %xmm3, %xmm8 -; SSE2-NEXT: movdqa %xmm8, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm4, %xmm8 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm8[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm3 -; SSE2-NEXT: pandn %xmm7, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm3 -; SSE2-NEXT: retq -; -; SSE4-LABEL: test157: -; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 -; SSE4-NEXT: retq -; -; AVX1-LABEL: test157: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm6 -; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm7 -; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 -; AVX1-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 -; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm4 -; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm5 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 -; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: test157: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm5 -; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm6 -; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 -; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm2 -; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm4 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: test157: -; AVX512F: # %bb.0: # %entry -; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: retq -entry: - %cmp = icmp ult <8 x i64> %a, %b - %sel = select <8 x i1> %cmp, <8 x i64> %b, <8 x i64> %a - ret <8 x i64> %sel -} - -define <8 x i64> @test158(<8 x i64> %a, <8 x i64> %b) { -; SSE2-LABEL: test158: -; SSE2: # %bb.0: # %entry -; SSE2-NEXT: movdqa {{.*#+}} xmm8 = [2147483648,2147483648,2147483648,2147483648] -; SSE2-NEXT: movdqa %xmm4, %xmm9 -; SSE2-NEXT: pxor %xmm8, %xmm9 -; SSE2-NEXT: movdqa %xmm0, %xmm10 -; SSE2-NEXT: pxor %xmm8, %xmm10 -; SSE2-NEXT: movdqa %xmm10, %xmm11 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm11 -; SSE2-NEXT: pshufd {{.*#+}} xmm12 = xmm11[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm10 -; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm10[1,1,3,3] -; SSE2-NEXT: pand %xmm12, %xmm9 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm11[1,1,3,3] -; SSE2-NEXT: por %xmm9, %xmm10 -; SSE2-NEXT: pand %xmm10, %xmm0 -; SSE2-NEXT: pandn %xmm4, %xmm10 -; SSE2-NEXT: por %xmm10, %xmm0 -; SSE2-NEXT: movdqa %xmm5, %xmm9 -; SSE2-NEXT: pxor %xmm8, %xmm9 -; SSE2-NEXT: movdqa %xmm1, %xmm4 -; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: movdqa %xmm4, %xmm10 -; SSE2-NEXT: pcmpgtd %xmm9, %xmm10 -; SSE2-NEXT: pshufd {{.*#+}} xmm11 = xmm10[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm9, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm9 = xmm4[1,1,3,3] -; SSE2-NEXT: pand %xmm11, %xmm9 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm10[1,1,3,3] -; SSE2-NEXT: por %xmm9, %xmm4 -; SSE2-NEXT: pand %xmm4, %xmm1 -; SSE2-NEXT: pandn %xmm5, %xmm4 -; SSE2-NEXT: por %xmm4, %xmm1 -; SSE2-NEXT: movdqa %xmm6, %xmm4 -; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: movdqa %xmm2, %xmm5 -; SSE2-NEXT: pxor %xmm8, %xmm5 -; SSE2-NEXT: movdqa %xmm5, %xmm9 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm9 -; SSE2-NEXT: pshufd {{.*#+}} xmm10 = xmm9[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm4, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm5[1,1,3,3] -; SSE2-NEXT: pand %xmm10, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm9[1,1,3,3] -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm2 -; SSE2-NEXT: pandn %xmm6, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm2 -; SSE2-NEXT: movdqa %xmm7, %xmm4 -; SSE2-NEXT: pxor %xmm8, %xmm4 -; SSE2-NEXT: pxor %xmm3, %xmm8 -; SSE2-NEXT: movdqa %xmm8, %xmm5 -; SSE2-NEXT: pcmpgtd %xmm4, %xmm5 -; SSE2-NEXT: pshufd {{.*#+}} xmm6 = xmm5[0,0,2,2] -; SSE2-NEXT: pcmpeqd %xmm4, %xmm8 -; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm8[1,1,3,3] -; SSE2-NEXT: pand %xmm6, %xmm4 -; SSE2-NEXT: pshufd {{.*#+}} xmm5 = xmm5[1,1,3,3] -; SSE2-NEXT: por %xmm4, %xmm5 -; SSE2-NEXT: pand %xmm5, %xmm3 -; SSE2-NEXT: pandn %xmm7, %xmm5 -; SSE2-NEXT: por %xmm5, %xmm3 -; SSE2-NEXT: retq -; -; SSE4-LABEL: test158: -; SSE4: # %bb.0: # %entry -; SSE4-NEXT: movdqa %xmm0, %xmm9 -; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm4, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm10 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm10, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm9, %xmm4 -; SSE4-NEXT: movdqa %xmm5, %xmm9 -; SSE4-NEXT: pxor %xmm8, %xmm9 -; SSE4-NEXT: movdqa %xmm1, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm9, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm1, %xmm5 -; SSE4-NEXT: movdqa %xmm6, %xmm1 -; SSE4-NEXT: pxor %xmm8, %xmm1 -; SSE4-NEXT: movdqa %xmm2, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pcmpgtq %xmm1, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm2, %xmm6 -; SSE4-NEXT: movdqa %xmm7, %xmm0 -; SSE4-NEXT: pxor %xmm8, %xmm0 -; SSE4-NEXT: pxor %xmm3, %xmm8 -; SSE4-NEXT: pcmpgtq %xmm0, %xmm8 -; SSE4-NEXT: movdqa %xmm8, %xmm0 -; SSE4-NEXT: blendvpd %xmm0, %xmm3, %xmm7 -; SSE4-NEXT: movapd %xmm4, %xmm0 -; SSE4-NEXT: movapd %xmm5, %xmm1 -; SSE4-NEXT: movapd %xmm6, %xmm2 -; SSE4-NEXT: movapd %xmm7, %xmm3 -; SSE4-NEXT: retq -; -; AVX1-LABEL: test158: -; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm6 -; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm6, %xmm4 -; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm6 -; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm7 -; AVX1-NEXT: vpcmpgtq %xmm6, %xmm7, %xmm6 -; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm6, %ymm4 -; AVX1-NEXT: vblendvpd %ymm4, %ymm0, %ymm2, %ymm0 -; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 -; AVX1-NEXT: vpxor %xmm5, %xmm2, %xmm2 -; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4 -; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4 -; AVX1-NEXT: vpcmpgtq %xmm2, %xmm4, %xmm2 -; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm4 -; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm5 -; AVX1-NEXT: vpcmpgtq %xmm4, %xmm5, %xmm4 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm4, %ymm2 -; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm3, %ymm1 -; AVX1-NEXT: retq -; -; AVX2-LABEL: test158: -; AVX2: # %bb.0: # %entry -; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm4 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] -; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm5 -; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm6 -; AVX2-NEXT: vpcmpgtq %ymm5, %ymm6, %ymm5 -; AVX2-NEXT: vblendvpd %ymm5, %ymm0, %ymm2, %ymm0 -; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm2 -; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm4 -; AVX2-NEXT: vpcmpgtq %ymm2, %ymm4, %ymm2 -; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm3, %ymm1 -; AVX2-NEXT: retq -; -; AVX512F-LABEL: test158: -; AVX512F: # %bb.0: # %entry ; AVX512F-NEXT: vpmaxuq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq entry: @@ -7749,7 +7483,7 @@ define <8 x i64> @test159(<8 x i64> %a, <8 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm9 ; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm10 ; SSE4-NEXT: pxor %xmm8, %xmm10 ; SSE4-NEXT: movdqa %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm8, %xmm0 @@ -7896,7 +7630,7 @@ define <8 x i64> @test160(<8 x i64> %a, <8 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm9 ; SSE4-NEXT: movdqa {{.*#+}} xmm8 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm10 +; SSE4-NEXT: movdqa %xmm9, %xmm10 ; SSE4-NEXT: pxor %xmm8, %xmm10 ; SSE4-NEXT: movdqa %xmm4, %xmm0 ; SSE4-NEXT: pxor %xmm8, %xmm0 @@ -8307,7 +8041,7 @@ define <4 x i64> @test165(<4 x i64> %a, <4 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm4 ; SSE4-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa %xmm4, %xmm6 ; SSE4-NEXT: pxor %xmm5, %xmm6 ; SSE4-NEXT: movdqa %xmm2, %xmm0 ; SSE4-NEXT: pxor %xmm5, %xmm0 @@ -8396,7 +8130,7 @@ define <4 x i64> @test166(<4 x i64> %a, <4 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm4 ; SSE4-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa %xmm4, %xmm6 ; SSE4-NEXT: pxor %xmm5, %xmm6 ; SSE4-NEXT: movdqa %xmm2, %xmm0 ; SSE4-NEXT: pxor %xmm5, %xmm0 @@ -9131,7 +8865,7 @@ define <4 x i64> @test175(<4 x i64> %a, <4 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm4 ; SSE4-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa %xmm4, %xmm6 ; SSE4-NEXT: pxor %xmm5, %xmm6 ; SSE4-NEXT: movdqa %xmm2, %xmm0 ; SSE4-NEXT: pxor %xmm5, %xmm0 @@ -9220,7 +8954,7 @@ define <4 x i64> @test176(<4 x i64> %a, <4 x i64> %b) { ; SSE4: # %bb.0: # %entry ; SSE4-NEXT: movdqa %xmm0, %xmm4 ; SSE4-NEXT: movdqa {{.*#+}} xmm5 = [9223372036854775808,9223372036854775808] -; SSE4-NEXT: movdqa %xmm0, %xmm6 +; SSE4-NEXT: movdqa %xmm4, %xmm6 ; SSE4-NEXT: pxor %xmm5, %xmm6 ; SSE4-NEXT: movdqa %xmm2, %xmm0 ; SSE4-NEXT: pxor %xmm5, %xmm0 diff --git a/llvm/test/CodeGen/X86/widen_conv-3.ll b/llvm/test/CodeGen/X86/widen_conv-3.ll index e1d27f18e61c..186e43e213b6 100644 --- a/llvm/test/CodeGen/X86/widen_conv-3.ll +++ b/llvm/test/CodeGen/X86/widen_conv-3.ll @@ -74,7 +74,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) diff --git a/llvm/test/CodeGen/X86/widen_conv-4.ll b/llvm/test/CodeGen/X86/widen_conv-4.ll index eabb4a487a78..4fa3bd522111 100644 --- a/llvm/test/CodeGen/X86/widen_conv-4.ll +++ b/llvm/test/CodeGen/X86/widen_conv-4.ll @@ -19,7 +19,7 @@ define void @convert_v7i16_v7f32(<7 x float>* %dst.addr, <7 x i16> %src) nounwin ; X86-SSE2-NEXT: movups %xmm0, (%eax) ; X86-SSE2-NEXT: movss %xmm2, 16(%eax) ; X86-SSE2-NEXT: movaps %xmm2, %xmm0 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm2[1],xmm0[1] +; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] ; X86-SSE2-NEXT: movss %xmm0, 24(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm2, 20(%eax) @@ -100,7 +100,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) diff --git a/llvm/test/CodeGen/X86/win64_frame.ll b/llvm/test/CodeGen/X86/win64_frame.ll index e8733472df0b..5690db023c5b 100644 --- a/llvm/test/CodeGen/X86/win64_frame.ll +++ b/llvm/test/CodeGen/X86/win64_frame.ll @@ -238,7 +238,7 @@ define i64 @f10(i64* %foo, i64 %bar, i64 %baz) { ; PUSHF-NEXT: .seh_setframe 5, 32 ; PUSHF-NEXT: .seh_endprologue ; PUSHF-NEXT: movq %rdx, %rsi -; PUSHF-NEXT: movq %rdx, %rax +; PUSHF-NEXT: movq %rsi, %rax ; PUSHF-NEXT: lock cmpxchgq %r8, (%rcx) ; PUSHF-NEXT: pushfq ; PUSHF-NEXT: popq %rdi @@ -269,7 +269,7 @@ define i64 @f10(i64* %foo, i64 %bar, i64 %baz) { ; SAHF-NEXT: .seh_setframe 5, 32 ; SAHF-NEXT: .seh_endprologue ; SAHF-NEXT: movq %rdx, %rsi -; SAHF-NEXT: movq %rdx, %rax +; SAHF-NEXT: movq %rsi, %rax ; SAHF-NEXT: lock cmpxchgq %r8, (%rcx) ; SAHF-NEXT: seto %al ; SAHF-NEXT: lahf diff --git a/llvm/test/CodeGen/X86/x86-interleaved-access.ll b/llvm/test/CodeGen/X86/x86-interleaved-access.ll index bc48ec05f411..9156c95c1f62 100644 --- a/llvm/test/CodeGen/X86/x86-interleaved-access.ll +++ b/llvm/test/CodeGen/X86/x86-interleaved-access.ll @@ -1757,7 +1757,7 @@ define void @interleaved_store_vf64_i8_stride4(<64 x i8> %a, <64 x i8> %b, <64 x ; AVX1-NEXT: vmovups %ymm1, -{{[0-9]+}}(%rsp) # 32-byte Spill ; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm4 = xmm9[0],xmm12[0],xmm9[1],xmm12[1],xmm9[2],xmm12[2],xmm9[3],xmm12[3] ; AVX1-NEXT: vmovdqa %xmm8, %xmm2 -; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm8[0],xmm14[0],xmm8[1],xmm14[1],xmm8[2],xmm14[2],xmm8[3],xmm14[3] +; AVX1-NEXT: vpunpcklwd {{.*#+}} xmm8 = xmm2[0],xmm14[0],xmm2[1],xmm14[1],xmm2[2],xmm14[2],xmm2[3],xmm14[3] ; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm8, %ymm13 ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm15 = xmm15[4],xmm5[4],xmm15[5],xmm5[5],xmm15[6],xmm5[6],xmm15[7],xmm5[7] ; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm10 = xmm10[4],xmm0[4],xmm10[5],xmm0[5],xmm10[6],xmm0[6],xmm10[7],xmm0[7] diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll index 579f847914a4..2899e38b71cd 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrap-unwind.ll @@ -23,7 +23,7 @@ target triple = "x86_64-apple-macosx" ; Compare the arguments and jump to exit. ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] -; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] ; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Store %a in the alloca. @@ -69,7 +69,7 @@ attributes #0 = { "no-frame-pointer-elim"="false" } ; Compare the arguments and jump to exit. ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] -; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] ; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. @@ -115,7 +115,7 @@ attributes #1 = { "no-frame-pointer-elim"="true" } ; Compare the arguments and jump to exit. ; After the prologue is set. ; CHECK: movl %edi, [[ARG0CPY:%e[a-z]+]] -; CHECK-NEXT: cmpl %esi, %edi +; CHECK-NEXT: cmpl %esi, [[ARG0CPY]] ; CHECK-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 65a5c78a5dad..a23cd0568fd1 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -17,7 +17,7 @@ target triple = "x86_64-apple-macosx" ; Compare the arguments and jump to exit. ; No prologue needed. ; ENABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; ENABLE-NEXT: cmpl %esi, %edi +; ENABLE-NEXT: cmpl %esi, [[ARG0CPY]] ; ENABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Prologue code. @@ -27,7 +27,7 @@ target triple = "x86_64-apple-macosx" ; Compare the arguments and jump to exit. ; After the prologue is set. ; DISABLE: movl %edi, [[ARG0CPY:%e[a-z]+]] -; DISABLE-NEXT: cmpl %esi, %edi +; DISABLE-NEXT: cmpl %esi, [[ARG0CPY]] ; DISABLE-NEXT: jge [[EXIT_LABEL:LBB[0-9_]+]] ; ; Store %a in the alloca. diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll index e96e31f5291a..3d16a28c1068 100644 --- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll +++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll @@ -15,7 +15,7 @@ ; ASM: .cv_fpo_proc @shrink_wrap_basic@16 8 ; ASM: .cv_loc 0 1 3 9 # t.c:3:9 ; ASM: movl %ecx, %eax -; ASM: cmpl %edx, %ecx +; ASM: cmpl %edx, %eax ; ASM: jl [[EPILOGUE:LBB0_[0-9]+]] ; ASM: pushl %ebx diff --git a/llvm/test/DebugInfo/X86/spill-nospill.ll b/llvm/test/DebugInfo/X86/spill-nospill.ll index 6a7aca52733d..4ba8d4adbaae 100644 --- a/llvm/test/DebugInfo/X86/spill-nospill.ll +++ b/llvm/test/DebugInfo/X86/spill-nospill.ll @@ -30,7 +30,7 @@ ; CHECK: callq g ; CHECK: movl %eax, %[[CSR:[^ ]*]] ; CHECK: #DEBUG_VALUE: f:y <- $esi -; CHECK: movl %eax, %ecx +; CHECK: movl %[[CSR]], %ecx ; CHECK: callq g ; CHECK: movl %[[CSR]], %ecx ; CHECK: callq g