From edb38a94f83bf9bf6e6b1c871e81082db43ddef0 Mon Sep 17 00:00:00 2001 From: Michael Kuperstein Date: Thu, 7 Jul 2016 16:55:35 +0000 Subject: [PATCH] Revert r274692 to check whether this is what breaks windows selfhost. llvm-svn: 274771 --- llvm/lib/Target/X86/CMakeLists.txt | 1 - llvm/lib/Target/X86/X86.h | 3 - llvm/lib/Target/X86/X86FixupSetCC.cpp | 183 ----------------- llvm/lib/Target/X86/X86TargetMachine.cpp | 3 +- .../CodeGen/X86/2008-08-17-UComiCodeGenBug.ll | 2 +- .../CodeGen/X86/2008-09-11-CoalescerBug2.ll | 2 +- .../CodeGen/X86/avx-intrinsics-fast-isel.ll | 40 ++-- llvm/test/CodeGen/X86/avx-intrinsics-x86.ll | 192 ++++++++---------- llvm/test/CodeGen/X86/avx512-cmp.ll | 4 +- llvm/test/CodeGen/X86/avx512-intrinsics.ll | 2 +- llvm/test/CodeGen/X86/avx512-mask-op.ll | 2 +- llvm/test/CodeGen/X86/bmi.ll | 4 +- llvm/test/CodeGen/X86/cmov.ll | 4 +- llvm/test/CodeGen/X86/cmp.ll | 4 +- llvm/test/CodeGen/X86/cmpxchg-i1.ll | 6 +- llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll | 4 +- llvm/test/CodeGen/X86/ctpop-combine.ll | 12 +- llvm/test/CodeGen/X86/fp128-cast.ll | 5 +- llvm/test/CodeGen/X86/fp128-compare.ll | 31 ++- llvm/test/CodeGen/X86/mcinst-lowering.ll | 15 +- llvm/test/CodeGen/X86/return-ext.ll | 10 +- llvm/test/CodeGen/X86/setcc-narrowing.ll | 4 +- llvm/test/CodeGen/X86/setcc.ll | 2 +- .../CodeGen/X86/sse-intrinsics-fast-isel.ll | 32 +-- llvm/test/CodeGen/X86/sse-intrinsics-x86.ll | 32 +-- .../CodeGen/X86/sse2-intrinsics-fast-isel.ll | 32 +-- llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 32 +-- .../CodeGen/X86/sse41-intrinsics-fast-isel.ll | 16 +- llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll | 8 +- llvm/test/CodeGen/X86/sse41.ll | 8 +- .../CodeGen/X86/sse42-intrinsics-fast-isel.ll | 72 +++---- llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll | 44 ++-- 32 files changed, 288 insertions(+), 523 deletions(-) delete mode 100644 llvm/lib/Target/X86/X86FixupSetCC.cpp diff --git a/llvm/lib/Target/X86/CMakeLists.txt b/llvm/lib/Target/X86/CMakeLists.txt index 894090f78977..41c851f1007f 100644 --- a/llvm/lib/Target/X86/CMakeLists.txt +++ b/llvm/lib/Target/X86/CMakeLists.txt @@ -19,7 +19,6 @@ set(sources X86FastISel.cpp X86FixupBWInsts.cpp X86FixupLEAs.cpp - X86FixupSetCC.cpp X86FloatingPoint.cpp X86FrameLowering.cpp X86ISelDAGToDAG.cpp diff --git a/llvm/lib/Target/X86/X86.h b/llvm/lib/Target/X86/X86.h index 23d6c7120a4b..27b428e21511 100644 --- a/llvm/lib/Target/X86/X86.h +++ b/llvm/lib/Target/X86/X86.h @@ -59,9 +59,6 @@ FunctionPass *createX86FixupLEAs(); /// recalculations. FunctionPass *createX86OptimizeLEAs(); -/// Return a pass that transforms setcc + movzx pairs into xor + setcc. -FunctionPass *createX86FixupSetCC(); - /// Return a pass that expands WinAlloca pseudo-instructions. FunctionPass *createX86WinAllocaExpander(); diff --git a/llvm/lib/Target/X86/X86FixupSetCC.cpp b/llvm/lib/Target/X86/X86FixupSetCC.cpp deleted file mode 100644 index fd7df70ec963..000000000000 --- a/llvm/lib/Target/X86/X86FixupSetCC.cpp +++ /dev/null @@ -1,183 +0,0 @@ -//===---- X86FixupSetCC.cpp - optimize usage of LEA instructions ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a pass that fixes zero-extension of setcc patterns. -// X86 setcc instructions are modeled to have no input arguments, and a single -// GR8 output argument. This is consistent with other similar instructions -// (e.g. movb), but means it is impossible to directly generate a setcc into -// the lower GR8 of a specified GR32. -// This means that ISel must select (zext (setcc)) into something like -// seta %al; movzbl %al, %eax. -// Unfortunately, this can cause a stall due to the partial register write -// performed by the setcc. Instead, we can use: -// xor %eax, %eax; seta %al -// This both avoids the stall, and encodes shorter. -//===----------------------------------------------------------------------===// - -#include "X86.h" -#include "X86InstrInfo.h" -#include "X86Subtarget.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" - -using namespace llvm; - -#define DEBUG_TYPE "x86-fixup-setcc" - -STATISTIC(NumSubstZexts, "Number of setcc + zext pairs substituted"); - -namespace { -class X86FixupSetCCPass : public MachineFunctionPass { -public: - X86FixupSetCCPass() : MachineFunctionPass(ID) {} - - const char *getPassName() const override { return "X86 Fixup SetCC"; } - - bool runOnMachineFunction(MachineFunction &MF) override; - -private: - // Find the preceding instruction that imp-defs eflags. - MachineInstr *findFlagsImpDef(MachineBasicBlock *MBB, - MachineBasicBlock::reverse_iterator MI); - - // Return true if MI imp-uses eflags. - bool impUsesFlags(MachineInstr *MI); - - // Return true if this is the opcode of a SetCC instruction with a register - // output. - bool isSetCCr(unsigned Opode); - - MachineRegisterInfo *MRI; - const X86InstrInfo *TII; - - enum { SearchBound = 16 }; - - static char ID; -}; - -char X86FixupSetCCPass::ID = 0; -} - -FunctionPass *llvm::createX86FixupSetCC() { return new X86FixupSetCCPass(); } - -bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) { - switch (Opcode) { - default: - return false; - case X86::SETOr: - case X86::SETNOr: - case X86::SETBr: - case X86::SETAEr: - case X86::SETEr: - case X86::SETNEr: - case X86::SETBEr: - case X86::SETAr: - case X86::SETSr: - case X86::SETNSr: - case X86::SETPr: - case X86::SETNPr: - case X86::SETLr: - case X86::SETGEr: - case X86::SETLEr: - case X86::SETGr: - return true; - } -} - -// We expect the instruction *immediately* before the setcc to imp-def -// EFLAGS (because of scheduling glue). To make this less brittle w.r.t -// scheduling, look backwards until we hit the beginning of the -// basic-block, or a small bound (to avoid quadratic behavior). -MachineInstr * -X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB, - MachineBasicBlock::reverse_iterator MI) { - auto MBBStart = MBB->instr_rend(); - for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI) - for (auto &Op : MI->implicit_operands()) - if ((Op.getReg() == X86::EFLAGS) && (Op.isDef())) - return &*MI; - - return nullptr; -} - -bool X86FixupSetCCPass::impUsesFlags(MachineInstr *MI) { - for (auto &Op : MI->implicit_operands()) - if ((Op.getReg() == X86::EFLAGS) && (Op.isUse())) - return true; - - return false; -} - -bool X86FixupSetCCPass::runOnMachineFunction(MachineFunction &MF) { - bool Changed = false; - MRI = &MF.getRegInfo(); - TII = MF.getSubtarget().getInstrInfo(); - - SmallVector ToErase; - - for (auto &MBB : MF) { - for (auto &MI : MBB) { - // Find a setcc that is used by a zext. - // This doesn't have to be the only use, the transformation is safe - // regardless. - if (!isSetCCr(MI.getOpcode())) - continue; - - MachineInstr *ZExt = nullptr; - for (auto &Use : MRI->use_instructions(MI.getOperand(0).getReg())) - if (Use.getOpcode() == X86::MOVZX32rr8) - ZExt = &Use; - - if (!ZExt) - continue; - - // Find the preceding instruction that imp-defs eflags. - MachineInstr *FlagsDefMI = findFlagsImpDef( - MI.getParent(), MachineBasicBlock::reverse_iterator(&MI)); - if (!FlagsDefMI) - continue; - - // We'd like to put something that clobbers eflags directly before - // FlagsDefMI. This can't hurt anything after FlagsDefMI, because - // it, itself, by definition, clobbers eflags. But it may happen that - // FlagsDefMI also *uses* eflags, in which case the transformation is - // invalid. - if (impUsesFlags(FlagsDefMI)) - continue; - - ++NumSubstZexts; - Changed = true; - - auto *RC = MRI->getRegClass(ZExt->getOperand(0).getReg()); - unsigned ZeroReg = MRI->createVirtualRegister(RC); - unsigned InsertReg = MRI->createVirtualRegister(RC); - - // Initialize a register with 0. This must go before the eflags def - BuildMI(MBB, FlagsDefMI, MI.getDebugLoc(), TII->get(X86::MOV32r0), - ZeroReg); - - // X86 setcc only takes an output GR8, so fake a GR32 input by inserting - // the setcc result into the low byte of the zeroed register. - BuildMI(*ZExt->getParent(), ZExt, ZExt->getDebugLoc(), - TII->get(X86::INSERT_SUBREG), InsertReg) - .addReg(ZeroReg) - .addReg(MI.getOperand(0).getReg()) - .addImm(X86::sub_8bit); - MRI->replaceRegWith(ZExt->getOperand(0).getReg(), InsertReg); - ToErase.push_back(ZExt); - } - } - - for (auto &I : ToErase) - I->eraseFromParent(); - - return Changed; -} diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index f9d2d0a3004f..8dd4c6614951 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -285,6 +285,7 @@ bool X86PassConfig::addInstSelector() { addPass(createCleanupLocalDynamicTLSPass()); addPass(createX86GlobalBaseRegPass()); + return false; } @@ -304,8 +305,6 @@ bool X86PassConfig::addPreISel() { } void X86PassConfig::addPreRegAlloc() { - addPass(createX86FixupSetCC()); - if (getOptLevel() != CodeGenOpt::None) addPass(createX86OptimizeLEAs()); diff --git a/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll b/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll index 907f4cc4ca3f..32f6ca0ce086 100644 --- a/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll +++ b/llvm/test/CodeGen/X86/2008-08-17-UComiCodeGenBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep xorl +; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movzbl define i32 @foo(<4 x float> %a, <4 x float> %b) nounwind { entry: diff --git a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll index a9875521fb18..9646650ae15c 100644 --- a/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll +++ b/llvm/test/CodeGen/X86/2008-09-11-CoalescerBug2.ll @@ -10,9 +10,9 @@ entry: ; SOURCE-SCHED: subl ; SOURCE-SCHED: movl ; SOURCE-SCHED: sarl -; SOURCE-SCHED: xorl ; SOURCE-SCHED: cmpl ; SOURCE-SCHED: setg +; SOURCE-SCHED: movzbl ; SOURCE-SCHED: movb ; SOURCE-SCHED: xorl ; SOURCE-SCHED: subl diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index 728fa1ce87ca..c3fb355d5087 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -3384,16 +3384,16 @@ declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone define i32 @test_mm_testnzc_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_testnzc_pd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testnzc_pd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3403,17 +3403,17 @@ declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm256_testnzc_pd(<4 x double> %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_testnzc_pd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %ymm1, %ymm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testnzc_pd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %ymm1, %ymm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) @@ -3424,16 +3424,16 @@ declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind r define i32 @test_mm_testnzc_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_testnzc_ps: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testnzc_ps: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -3443,17 +3443,17 @@ declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnon define i32 @test_mm256_testnzc_ps(<8 x float> %a0, <8 x float> %a1) nounwind { ; X32-LABEL: test_mm256_testnzc_ps: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %ymm1, %ymm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testnzc_ps: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %ymm1, %ymm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) @@ -3464,17 +3464,17 @@ declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind rea define i32 @test_mm256_testnzc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_testnzc_si256: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vptest %ymm1, %ymm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testnzc_si256: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vptest %ymm1, %ymm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) @@ -3485,16 +3485,16 @@ declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone define i32 @test_mm_testz_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_testz_pd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %xmm1, %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testz_pd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %xmm1, %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3504,17 +3504,17 @@ declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnon define i32 @test_mm256_testz_pd(<4 x double> %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_testz_pd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %ymm1, %ymm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testz_pd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %ymm1, %ymm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) @@ -3525,16 +3525,16 @@ declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind rea define i32 @test_mm_testz_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_testz_ps: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %xmm1, %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testz_ps: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %xmm1, %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -3544,17 +3544,17 @@ declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm256_testz_ps(<8 x float> %a0, <8 x float> %a1) nounwind { ; X32-LABEL: test_mm256_testz_ps: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %ymm1, %ymm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testz_ps: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %ymm1, %ymm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) @@ -3565,17 +3565,17 @@ declare i32 @llvm.x86.avx.vtestz.ps.256(<8 x float>, <8 x float>) nounwind readn define i32 @test_mm256_testz_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_testz_si256: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vptest %ymm1, %ymm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testz_si256: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vptest %ymm1, %ymm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll index a7421be86e90..5e83cd0b6036 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-x86.ll @@ -173,16 +173,16 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_comige_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomisd %xmm1, %xmm0 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_comige_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -193,16 +193,16 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_comigt_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomisd %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_comigt_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomisd %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -213,16 +213,16 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_comile_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomisd %xmm0, %xmm1 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_comile_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomisd %xmm0, %xmm1 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -233,16 +233,16 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_comilt_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomisd %xmm0, %xmm1 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_comilt_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomisd %xmm0, %xmm1 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1264,16 +1264,16 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_ucomige_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomisd %xmm1, %xmm0 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_ucomige_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1284,16 +1284,16 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_ucomigt_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomisd %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_ucomigt_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomisd %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1304,16 +1304,16 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_ucomile_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomisd %xmm0, %xmm1 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_ucomile_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomisd %xmm0, %xmm1 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1324,16 +1324,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_sse2_ucomilt_sd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomisd %xmm0, %xmm1 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse2_ucomilt_sd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomisd %xmm0, %xmm1 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1791,16 +1791,16 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-LABEL: test_x86_sse41_ptestnzc: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vptest %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse41_ptestnzc: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vptest %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res @@ -1811,16 +1811,16 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { ; AVX-LABEL: test_x86_sse41_ptestz: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vptest %xmm1, %xmm0 ; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse41_ptestz: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vptest %xmm1, %xmm0 ; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res @@ -1943,29 +1943,23 @@ define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { } -define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { ; AVX-LABEL: test_x86_sse42_pcmpestria128: ; AVX: ## BB#0: -; AVX-NEXT: pushl %ebx ; AVX-NEXT: movl $7, %eax ; AVX-NEXT: movl $7, %edx -; AVX-NEXT: xorl %ebx, %ebx ; AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX-NEXT: seta %bl -; AVX-NEXT: movl %ebx, %eax -; AVX-NEXT: popl %ebx +; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpestria128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: pushl %ebx ; AVX512VL-NEXT: movl $7, %eax ; AVX512VL-NEXT: movl $7, %edx -; AVX512VL-NEXT: xorl %ebx, %ebx ; AVX512VL-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX512VL-NEXT: seta %bl -; AVX512VL-NEXT: movl %ebx, %eax -; AVX512VL-NEXT: popl %ebx +; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -1997,29 +1991,23 @@ define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { ; AVX-LABEL: test_x86_sse42_pcmpestrio128: ; AVX: ## BB#0: -; AVX-NEXT: pushl %ebx ; AVX-NEXT: movl $7, %eax ; AVX-NEXT: movl $7, %edx -; AVX-NEXT: xorl %ebx, %ebx ; AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX-NEXT: seto %bl -; AVX-NEXT: movl %ebx, %eax -; AVX-NEXT: popl %ebx +; AVX-NEXT: seto %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpestrio128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: pushl %ebx ; AVX512VL-NEXT: movl $7, %eax ; AVX512VL-NEXT: movl $7, %edx -; AVX512VL-NEXT: xorl %ebx, %ebx ; AVX512VL-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX512VL-NEXT: seto %bl -; AVX512VL-NEXT: movl %ebx, %eax -; AVX512VL-NEXT: popl %ebx +; AVX512VL-NEXT: seto %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -2027,29 +2015,23 @@ define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) nounwind declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { ; AVX-LABEL: test_x86_sse42_pcmpestris128: ; AVX: ## BB#0: -; AVX-NEXT: pushl %ebx ; AVX-NEXT: movl $7, %eax ; AVX-NEXT: movl $7, %edx -; AVX-NEXT: xorl %ebx, %ebx ; AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX-NEXT: sets %bl -; AVX-NEXT: movl %ebx, %eax -; AVX-NEXT: popl %ebx +; AVX-NEXT: sets %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpestris128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: pushl %ebx ; AVX512VL-NEXT: movl $7, %eax ; AVX512VL-NEXT: movl $7, %edx -; AVX512VL-NEXT: xorl %ebx, %ebx ; AVX512VL-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX512VL-NEXT: sets %bl -; AVX512VL-NEXT: movl %ebx, %eax -; AVX512VL-NEXT: popl %ebx +; AVX512VL-NEXT: sets %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -2057,29 +2039,23 @@ define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) nounwind declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { ; AVX-LABEL: test_x86_sse42_pcmpestriz128: ; AVX: ## BB#0: -; AVX-NEXT: pushl %ebx ; AVX-NEXT: movl $7, %eax ; AVX-NEXT: movl $7, %edx -; AVX-NEXT: xorl %ebx, %ebx ; AVX-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX-NEXT: sete %bl -; AVX-NEXT: movl %ebx, %eax -; AVX-NEXT: popl %ebx +; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpestriz128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: pushl %ebx ; AVX512VL-NEXT: movl $7, %eax ; AVX512VL-NEXT: movl $7, %edx -; AVX512VL-NEXT: xorl %ebx, %ebx ; AVX512VL-NEXT: vpcmpestri $7, %xmm1, %xmm0 -; AVX512VL-NEXT: sete %bl -; AVX512VL-NEXT: movl %ebx, %eax -; AVX512VL-NEXT: popl %ebx +; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -2175,16 +2151,16 @@ define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { ; AVX-LABEL: test_x86_sse42_pcmpistria128: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpistria128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -2215,16 +2191,16 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { ; AVX-LABEL: test_x86_sse42_pcmpistrio128: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX-NEXT: seto %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpistrio128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX512VL-NEXT: seto %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -2235,16 +2211,16 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { ; AVX-LABEL: test_x86_sse42_pcmpistris128: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX-NEXT: sets %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpistris128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX512VL-NEXT: sets %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -2255,16 +2231,16 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { ; AVX-LABEL: test_x86_sse42_pcmpistriz128: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse42_pcmpistriz128: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vpcmpistri $7, %xmm1, %xmm0 ; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -2381,16 +2357,16 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_comige_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm1, %xmm0 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_comige_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2401,16 +2377,16 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_comigt_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_comigt_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomiss %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2421,16 +2397,16 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_comile_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_comile_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomiss %xmm0, %xmm1 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2441,16 +2417,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_comilt_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vcomiss %xmm0, %xmm1 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_comilt_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vcomiss %xmm0, %xmm1 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2821,16 +2797,16 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_ucomige_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm1, %xmm0 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_ucomige_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2841,16 +2817,16 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_ucomigt_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_ucomigt_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomiss %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2861,16 +2837,16 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_ucomile_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: setae %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_ucomile_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomiss %xmm0, %xmm1 ; AVX512VL-NEXT: setae %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -2881,16 +2857,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_sse_ucomilt_ss: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vucomiss %xmm0, %xmm1 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_sse_ucomilt_ss: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vucomiss %xmm0, %xmm1 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -3802,17 +3778,17 @@ declare i32 @llvm.x86.avx.ptestc.256(<4 x i64>, <4 x i64>) nounwind readnone define i32 @test_x86_avx_ptestnzc_256(<4 x i64> %a0, <4 x i64> %a1) { ; AVX-LABEL: test_x86_avx_ptestnzc_256: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vptest %ymm1, %ymm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_ptestnzc_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vptest %ymm1, %ymm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] ret i32 %res @@ -3823,17 +3799,17 @@ declare i32 @llvm.x86.avx.ptestnzc.256(<4 x i64>, <4 x i64>) nounwind readnone define i32 @test_x86_avx_ptestz_256(<4 x i64> %a0, <4 x i64> %a1) { ; AVX-LABEL: test_x86_avx_ptestz_256: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vptest %ymm1, %ymm0 ; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_ptestz_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vptest %ymm1, %ymm0 ; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.ptestz.256(<4 x i64> %a0, <4 x i64> %a1) ; [#uses=1] ret i32 %res @@ -4199,16 +4175,16 @@ declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readn define i32 @test_x86_avx_vtestnzc_pd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_avx_vtestnzc_pd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestpd %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestnzc_pd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestpd %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -4219,17 +4195,17 @@ declare i32 @llvm.x86.avx.vtestnzc.pd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_avx_vtestnzc_pd_256(<4 x double> %a0, <4 x double> %a1) { ; AVX-LABEL: test_x86_avx_vtestnzc_pd_256: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestpd %ymm1, %ymm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestnzc_pd_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestpd %ymm1, %ymm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] ret i32 %res @@ -4240,16 +4216,16 @@ declare i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double>, <4 x double>) nounwind r define i32 @test_x86_avx_vtestnzc_ps(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_avx_vtestnzc_ps: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestps %xmm1, %xmm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestnzc_ps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestps %xmm1, %xmm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -4260,17 +4236,17 @@ declare i32 @llvm.x86.avx.vtestnzc.ps(<4 x float>, <4 x float>) nounwind readnon define i32 @test_x86_avx_vtestnzc_ps_256(<8 x float> %a0, <8 x float> %a1) { ; AVX-LABEL: test_x86_avx_vtestnzc_ps_256: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestps %ymm1, %ymm0 ; AVX-NEXT: seta %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestnzc_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestps %ymm1, %ymm0 ; AVX512VL-NEXT: seta %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] ret i32 %res @@ -4281,16 +4257,16 @@ declare i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float>, <8 x float>) nounwind rea define i32 @test_x86_avx_vtestz_pd(<2 x double> %a0, <2 x double> %a1) { ; AVX-LABEL: test_x86_avx_vtestz_pd: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestpd %xmm1, %xmm0 ; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestz_pd: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestpd %xmm1, %xmm0 ; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestz.pd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -4301,17 +4277,17 @@ declare i32 @llvm.x86.avx.vtestz.pd(<2 x double>, <2 x double>) nounwind readnon define i32 @test_x86_avx_vtestz_pd_256(<4 x double> %a0, <4 x double> %a1) { ; AVX-LABEL: test_x86_avx_vtestz_pd_256: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestpd %ymm1, %ymm0 ; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestz_pd_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestpd %ymm1, %ymm0 ; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %a0, <4 x double> %a1) ; [#uses=1] ret i32 %res @@ -4322,16 +4298,16 @@ declare i32 @llvm.x86.avx.vtestz.pd.256(<4 x double>, <4 x double>) nounwind rea define i32 @test_x86_avx_vtestz_ps(<4 x float> %a0, <4 x float> %a1) { ; AVX-LABEL: test_x86_avx_vtestz_ps: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestps %xmm1, %xmm0 ; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestz_ps: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestps %xmm1, %xmm0 ; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestz.ps(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -4342,17 +4318,17 @@ declare i32 @llvm.x86.avx.vtestz.ps(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_avx_vtestz_ps_256(<8 x float> %a0, <8 x float> %a1) { ; AVX-LABEL: test_x86_avx_vtestz_ps_256: ; AVX: ## BB#0: -; AVX-NEXT: xorl %eax, %eax ; AVX-NEXT: vtestps %ymm1, %ymm0 ; AVX-NEXT: sete %al +; AVX-NEXT: movzbl %al, %eax ; AVX-NEXT: vzeroupper ; AVX-NEXT: retl ; ; AVX512VL-LABEL: test_x86_avx_vtestz_ps_256: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: xorl %eax, %eax ; AVX512VL-NEXT: vtestps %ymm1, %ymm0 ; AVX512VL-NEXT: sete %al +; AVX512VL-NEXT: movzbl %al, %eax ; AVX512VL-NEXT: retl %res = call i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %a0, <8 x float> %a1) ; [#uses=1] ret i32 %res diff --git a/llvm/test/CodeGen/X86/avx512-cmp.ll b/llvm/test/CodeGen/X86/avx512-cmp.ll index fceb9c14b7df..52caa0ed5d60 100644 --- a/llvm/test/CodeGen/X86/avx512-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512-cmp.ll @@ -94,9 +94,9 @@ return: ; preds = %if.end, %entry define i32 @test6(i32 %a, i32 %b) { ; ALL-LABEL: test6: ; ALL: ## BB#0: -; ALL-NEXT: xorl %eax, %eax ; ALL-NEXT: cmpl %esi, %edi ; ALL-NEXT: sete %al +; ALL-NEXT: movzbl %al, %eax ; ALL-NEXT: retq %cmp = icmp eq i32 %a, %b %res = zext i1 %cmp to i32 @@ -106,9 +106,9 @@ define i32 @test6(i32 %a, i32 %b) { define i32 @test7(double %x, double %y) #2 { ; ALL-LABEL: test7: ; ALL: ## BB#0: ## %entry -; ALL-NEXT: xorl %eax, %eax ; ALL-NEXT: vucomisd %xmm1, %xmm0 ; ALL-NEXT: setne %al +; ALL-NEXT: movzbl %al, %eax ; ALL-NEXT: retq entry: %0 = fcmp one double %x, %y diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 1d51b6f65491..d1961fc96e6c 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -7,9 +7,9 @@ define i32 @test_kortestz(i16 %a0, i16 %a1) { ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %esi, %k0 ; CHECK-NEXT: kmovw %edi, %k1 -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: kortestw %k0, %k1 ; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retq %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %a0, i16 %a1) ret i32 %res diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll index 0c94a8bc1c90..939c338ac9a4 100644 --- a/llvm/test/CodeGen/X86/avx512-mask-op.ll +++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll @@ -571,9 +571,9 @@ define <64 x i8> @test17(i64 %x, i32 %y, i32 %z) { ; KNL-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm1, %xmm1 ; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1 -; KNL-NEXT: xorl %ecx, %ecx ; KNL-NEXT: cmpl %edx, %esi ; KNL-NEXT: setg %cl +; KNL-NEXT: movzbl %cl, %ecx ; KNL-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 ; KNL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] ; KNL-NEXT: vpsllw $7, %ymm0, %ymm0 diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll index 10b1f51e3da9..c79a6374226d 100644 --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -201,6 +201,7 @@ define i1 @and_cmp_const_power_of_two(i32 %x, i32 %y) { ; CHECK-NEXT: btl %esi, %edi ; CHECK-NEXT: setae %al ; CHECK-NEXT: retq +; %shl = shl i32 1, %y %and = and i32 %x, %shl %cmp = icmp ne i32 %and, %shl @@ -212,11 +213,12 @@ define i32 @and_cmp_not_one_use(i32 %x) { ; CHECK-LABEL: and_cmp_not_one_use: ; CHECK: # BB#0: ; CHECK-NEXT: andl $37, %edi -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $37, %edi ; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl %edi, %eax ; CHECK-NEXT: retq +; %and = and i32 %x, 37 %cmp = icmp eq i32 %and, 37 %ext = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/X86/cmov.ll b/llvm/test/CodeGen/X86/cmov.ll index 9acc9ea4fb18..f2f36b15d0c5 100644 --- a/llvm/test/CodeGen/X86/cmov.ll +++ b/llvm/test/CodeGen/X86/cmov.ll @@ -120,8 +120,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind define i32 @test5(i32* nocapture %P) nounwind readonly { entry: ; CHECK-LABEL: test5: -; CHECK: xorl %eax, %eax ; CHECK: setg %al +; CHECK: movzbl %al, %eax ; CHECK: orl $-2, %eax ; CHECK: ret @@ -134,8 +134,8 @@ entry: define i32 @test6(i32* nocapture %P) nounwind readonly { entry: ; CHECK-LABEL: test6: -; CHECK: xorl %eax, %eax ; CHECK: setl %al +; CHECK: movzbl %al, %eax ; CHECK: leal 4(%rax,%rax,8), %eax ; CHECK: ret %0 = load i32, i32* %P, align 4 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll index d24f27ddf22c..94f7c54c2856 100644 --- a/llvm/test/CodeGen/X86/cmp.ll +++ b/llvm/test/CodeGen/X86/cmp.ll @@ -49,9 +49,9 @@ define i64 @test3(i64 %x) nounwind { %r = zext i1 %t to i64 ret i64 %r ; CHECK-LABEL: test3: -; CHECK: xorl %eax, %eax ; CHECK: testq %rdi, %rdi ; CHECK: sete %al +; CHECK: movzbl %al, %eax ; CHECK: ret } @@ -60,9 +60,9 @@ define i64 @test4(i64 %x) nounwind { %r = zext i1 %t to i64 ret i64 %r ; CHECK-LABEL: test4: -; CHECK: xorl %eax, %eax ; CHECK: testq %rdi, %rdi ; CHECK: setle %al +; CHECK: movzbl %al, %eax ; CHECK: ret } diff --git a/llvm/test/CodeGen/X86/cmpxchg-i1.ll b/llvm/test/CodeGen/X86/cmpxchg-i1.ll index 97e4472b0890..5f5869f78bba 100644 --- a/llvm/test/CodeGen/X86/cmpxchg-i1.ll +++ b/llvm/test/CodeGen/X86/cmpxchg-i1.ll @@ -34,7 +34,7 @@ define i64 @cmpxchg_sext(i32* %addr, i32 %desired, i32 %new) { ; CHECK-LABEL: cmpxchg_sext: ; CHECK-DAG: cmpxchgl ; CHECK-NOT: cmpl -; CHECK: sete %cl +; CHECK: sete %al ; CHECK: retq %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %pair, 1 @@ -44,10 +44,10 @@ define i64 @cmpxchg_sext(i32* %addr, i32 %desired, i32 %new) { define i32 @cmpxchg_zext(i32* %addr, i32 %desired, i32 %new) { ; CHECK-LABEL: cmpxchg_zext: -; CHECK: xorl %e[[R:[a-z]]]x ; CHECK: cmpxchgl ; CHECK-NOT: cmp -; CHECK: sete %[[R]]l +; CHECK: sete [[BYTE:%[a-z0-9]+]] +; CHECK: movzbl [[BYTE]], %eax %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst %success = extractvalue { i32, i1 } %pair, 1 %mask = zext i1 %success to i32 diff --git a/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll b/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll index 1510b2a49c32..278e6a4ed75e 100644 --- a/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll +++ b/llvm/test/CodeGen/X86/cmpxchg-i128-i1.ll @@ -44,10 +44,10 @@ define i1 @cmpxchg_arithcmp(i128* %addr, i128 %desired, i128 %new) { define i128 @cmpxchg_zext(i128* %addr, i128 %desired, i128 %new) { ; CHECK-LABEL: cmpxchg_zext: -; CHECK: xorl ; CHECK: cmpxchg16b ; CHECK-NOT: cmpq -; CHECK: sete +; CHECK: sete [[BYTE:%[a-z0-9]+]] +; CHECK: movzbl [[BYTE]], %eax %pair = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst seq_cst %success = extractvalue { i128, i1 } %pair, 1 %mask = zext i1 %success to i128 diff --git a/llvm/test/CodeGen/X86/ctpop-combine.ll b/llvm/test/CodeGen/X86/ctpop-combine.ll index 435401639f05..3da16fb0121b 100644 --- a/llvm/test/CodeGen/X86/ctpop-combine.ll +++ b/llvm/test/CodeGen/X86/ctpop-combine.ll @@ -6,10 +6,10 @@ declare i64 @llvm.ctpop.i64(i64) nounwind readnone define i32 @test1(i64 %x) nounwind readnone { ; CHECK-LABEL: test1: ; CHECK: # BB#0: -; CHECK-NEXT: leaq -1(%rdi), %rcx -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testq %rcx, %rdi +; CHECK-NEXT: leaq -1(%rdi), %rax +; CHECK-NEXT: testq %rax, %rdi ; CHECK-NEXT: setne %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retq %count = tail call i64 @llvm.ctpop.i64(i64 %x) %cast = trunc i64 %count to i32 @@ -22,10 +22,10 @@ define i32 @test1(i64 %x) nounwind readnone { define i32 @test2(i64 %x) nounwind readnone { ; CHECK-LABEL: test2: ; CHECK: # BB#0: -; CHECK-NEXT: leaq -1(%rdi), %rcx -; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: testq %rcx, %rdi +; CHECK-NEXT: leaq -1(%rdi), %rax +; CHECK-NEXT: testq %rax, %rdi ; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retq %count = tail call i64 @llvm.ctpop.i64(i64 %x) %cmp = icmp ult i64 %count, 2 diff --git a/llvm/test/CodeGen/X86/fp128-cast.ll b/llvm/test/CodeGen/X86/fp128-cast.ll index 2d872498dfc7..8b6ed54cc3fc 100644 --- a/llvm/test/CodeGen/X86/fp128-cast.ll +++ b/llvm/test/CodeGen/X86/fp128-cast.ll @@ -238,7 +238,6 @@ entry: ; X64-LABEL: TestConst128: ; X64: movaps {{.*}}, %xmm1 ; X64-NEXT: callq __gttf2 -; X64-NEXT: xorl ; X64-NEXT: test ; X64: retq } @@ -278,9 +277,9 @@ entry: ; X64-NEXT: movq (%rsp), ; X64-NEXT: movq % ; X64-NEXT: shrq $32, -; X64: xorl %eax, %eax -; X64-NEXT: orl +; X64: orl ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64: retq ; ; If TestBits128 fails due to any llvm or clang change, diff --git a/llvm/test/CodeGen/X86/fp128-compare.ll b/llvm/test/CodeGen/X86/fp128-compare.ll index 6ad3b74aeafa..d9a48c5c13e5 100644 --- a/llvm/test/CodeGen/X86/fp128-compare.ll +++ b/llvm/test/CodeGen/X86/fp128-compare.ll @@ -8,9 +8,8 @@ entry: ret i32 %conv ; CHECK-LABEL: TestComp128GT: ; CHECK: callq __gttf2 -; CHECK: xorl %ecx, %ecx -; CHECK: setg %cl -; CHECK: movl %ecx, %eax +; CHECK: setg %al +; CHECK: movzbl %al, %eax ; CHECK: retq } @@ -21,10 +20,9 @@ entry: ret i32 %conv ; CHECK-LABEL: TestComp128GE: ; CHECK: callq __getf2 -; CHECK: xorl %ecx, %ecx ; CHECK: testl %eax, %eax -; CHECK: setns %cl -; CHECK: movl %ecx, %eax +; CHECK: setns %al +; CHECK: movzbl %al, %eax ; CHECK: retq } @@ -50,10 +48,9 @@ entry: ret i32 %conv ; CHECK-LABEL: TestComp128LE: ; CHECK: callq __letf2 -; CHECK: xorl %ecx, %ecx -; CHECK: testl %eax, %eax -; CHECK: setle %cl -; CHECK: movl %ecx, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK: setle %al +; CHECK: movzbl %al, %eax ; CHECK: retq } @@ -64,10 +61,9 @@ entry: ret i32 %conv ; CHECK-LABEL: TestComp128EQ: ; CHECK: callq __eqtf2 -; CHECK: xorl %ecx, %ecx -; CHECK: testl %eax, %eax -; CHECK: sete %cl -; CHECK: movl %ecx, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK: sete %al +; CHECK: movzbl %al, %eax ; CHECK: retq } @@ -78,10 +74,9 @@ entry: ret i32 %conv ; CHECK-LABEL: TestComp128NE: ; CHECK: callq __netf2 -; CHECK: xorl %ecx, %ecx -; CHECK: testl %eax, %eax -; CHECK: setne %cl -; CHECK: movl %ecx, %eax +; CHECK-NEXT: testl %eax, %eax +; CHECK: setne %al +; CHECK: movzbl %al, %eax ; CHECK: retq } diff --git a/llvm/test/CodeGen/X86/mcinst-lowering.ll b/llvm/test/CodeGen/X86/mcinst-lowering.ll index 7b16d7616fe5..51b2895f1c78 100644 --- a/llvm/test/CodeGen/X86/mcinst-lowering.ll +++ b/llvm/test/CodeGen/X86/mcinst-lowering.ll @@ -3,17 +3,26 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.0.0" -declare i32 @foo(); - define i32 @f0(i32* nocapture %x) nounwind readonly ssp { entry: - %tmp1 = call i32 @foo() + %tmp1 = load i32, i32* %x ; [#uses=2] + %tobool = icmp eq i32 %tmp1, 0 ; [#uses=1] + br i1 %tobool, label %if.end, label %return + +if.end: ; preds = %entry + +; Check that we lower to the short form of cmpl, which has a fixed %eax +; register. +; ; CHECK: cmpl $16777216, %eax ; CHECK: # encoding: [0x3d,0x00,0x00,0x00,0x01] %cmp = icmp eq i32 %tmp1, 16777216 ; [#uses=1] %conv = zext i1 %cmp to i32 ; [#uses=1] ret i32 %conv + +return: ; preds = %entry + ret i32 0 } define i32 @f1() nounwind { diff --git a/llvm/test/CodeGen/X86/return-ext.ll b/llvm/test/CodeGen/X86/return-ext.ll index ef160f43b4aa..e2ad78815e2a 100644 --- a/llvm/test/CodeGen/X86/return-ext.ll +++ b/llvm/test/CodeGen/X86/return-ext.ll @@ -42,9 +42,9 @@ entry: ; Except on Darwin, for legacy reasons. ; DARWIN-LABEL: unsigned_i8: -; DARWIN: xorl -; DARWIN-NEXT: cmp +; DARWIN: cmp ; DARWIN-NEXT: sete +; DARWIN-NEXT: movzbl ; DARWIN-NEXT: ret } @@ -63,9 +63,9 @@ entry: ; Except on Darwin, for legacy reasons. ; DARWIN-LABEL: signed_i8: -; DARWIN: xorl -; DARWIN-NEXT: cmp +; DARWIN: cmp ; DARWIN-NEXT: sete +; DARWIN-NEXT: movzbl ; DARWIN-NEXT: ret } @@ -85,7 +85,7 @@ entry: ; CHECK-NEXT: addw ; CHECK-NEXT: ret -; Except on Darwin, for legacy reasons. +; Except on Darwin, for legay reasons. ; DARWIN-LABEL: unsigned_i16: ; DARWIN-BWOFF: movw ; DARWIN-BWON: movzwl diff --git a/llvm/test/CodeGen/X86/setcc-narrowing.ll b/llvm/test/CodeGen/X86/setcc-narrowing.ll index a4259ddd2318..bf5b45031a24 100644 --- a/llvm/test/CodeGen/X86/setcc-narrowing.ll +++ b/llvm/test/CodeGen/X86/setcc-narrowing.ll @@ -6,9 +6,9 @@ define i32 @t1() nounwind ssp { entry: ; CHECK-LABEL: t1: -; CHECK: xorl %eax, %eax -; CHECK-NEXT: cmpl $0, _t1.global +; CHECK: cmpl $0, _t1.global ; CHECK-NEXT: setne %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: ret %0 = load i64, i64* @t1.global, align 8 %and = and i64 4294967295, %0 diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll index eabcda4e075f..d5874344fd10 100644 --- a/llvm/test/CodeGen/X86/setcc.ll +++ b/llvm/test/CodeGen/X86/setcc.ll @@ -7,8 +7,8 @@ define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp { entry: ; CHECK-LABEL: t1: -; CHECK: xorl %eax, %eax ; CHECK: seta %al +; CHECK: movzbl %al, %eax ; CHECK: shll $5, %eax %0 = icmp ugt i16 %x, 26 ; [#uses=1] %iftmp.1.0 = select i1 %0, i16 32, i16 0 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll index 090ddfdfa93a..fe63b82f814a 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll @@ -593,16 +593,16 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_comige_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comiss %xmm1, %xmm0 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comige_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comiss %xmm1, %xmm0 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -612,16 +612,16 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_comigt_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comiss %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comigt_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comiss %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -631,16 +631,16 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_comile_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comiss %xmm0, %xmm1 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comile_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comiss %xmm0, %xmm1 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -650,16 +650,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_comilt_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comiss %xmm0, %xmm1 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comilt_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comiss %xmm0, %xmm1 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -2094,16 +2094,16 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_ucomige_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomiss %xmm1, %xmm0 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomige_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm1, %xmm0 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -2113,16 +2113,16 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_ucomigt_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomiss %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomigt_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -2132,16 +2132,16 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_ucomile_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomiss %xmm0, %xmm1 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomile_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm1 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -2151,16 +2151,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_ucomilt_ss: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomiss %xmm0, %xmm1 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomilt_ss: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomiss %xmm0, %xmm1 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ret i32 %res diff --git a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll index 1df432185701..c346064e7aa8 100644 --- a/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse-intrinsics-x86.ll @@ -77,16 +77,16 @@ declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comige_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_comige_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm1, %xmm0 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_comige_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomiss %xmm1, %xmm0 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -97,16 +97,16 @@ declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comigt_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_comigt_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm1, %xmm0 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_comigt_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomiss %xmm1, %xmm0 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -117,16 +117,16 @@ declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comile_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_comile_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm0, %xmm1 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_comile_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomiss %xmm0, %xmm1 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -137,16 +137,16 @@ declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_comilt_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_comilt_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comiss %xmm0, %xmm1 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_comilt_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomiss %xmm0, %xmm1 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -517,16 +517,16 @@ declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomige_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_ucomige_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm0 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_ucomige_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomiss %xmm1, %xmm0 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -537,16 +537,16 @@ declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_ucomigt_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm1, %xmm0 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_ucomigt_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomiss %xmm1, %xmm0 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -557,16 +557,16 @@ declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomile_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_ucomile_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm0, %xmm1 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_ucomile_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomiss %xmm0, %xmm1 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res @@ -577,16 +577,16 @@ declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone define i32 @test_x86_sse_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse_ucomilt_ss: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomiss %xmm0, %xmm1 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse_ucomilt_ss: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomiss %xmm0, %xmm1 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) ; [#uses=1] ret i32 %res diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index fa71325d7d6e..57a1953f1633 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -983,16 +983,16 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_comige_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comisd %xmm1, %xmm0 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comige_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comisd %xmm1, %xmm0 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -1002,16 +1002,16 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_comigt_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comisd %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comigt_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comisd %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -1021,16 +1021,16 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_comile_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comisd %xmm0, %xmm1 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comile_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comisd %xmm0, %xmm1 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -1040,16 +1040,16 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_comilt_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: comisd %xmm0, %xmm1 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_comilt_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: comisd %xmm0, %xmm1 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3538,16 +3538,16 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_ucomige_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomisd %xmm1, %xmm0 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomige_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm1, %xmm0 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3557,16 +3557,16 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_ucomigt_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomisd %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomigt_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3576,16 +3576,16 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_ucomile_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomisd %xmm0, %xmm1 ; X32-NEXT: setae %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomile_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm0, %xmm1 ; X64-NEXT: setae %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3595,16 +3595,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_ucomilt_sd: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ucomisd %xmm0, %xmm1 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_ucomilt_sd: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ucomisd %xmm0, %xmm1 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index 617e30e4b92c..9b595fc44cb7 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -77,16 +77,16 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comige_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_comige_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comisd %xmm1, %xmm0 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_comige_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomisd %xmm1, %xmm0 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -97,16 +97,16 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comigt_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_comigt_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comisd %xmm1, %xmm0 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_comigt_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomisd %xmm1, %xmm0 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -117,16 +117,16 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comile_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_comile_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comisd %xmm0, %xmm1 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_comile_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomisd %xmm0, %xmm1 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -137,16 +137,16 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_x86_sse2_comilt_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_comilt_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: comisd %xmm0, %xmm1 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_comilt_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vcomisd %xmm0, %xmm1 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1152,16 +1152,16 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomige_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_ucomige_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_ucomige_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomisd %xmm1, %xmm0 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1172,16 +1172,16 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_ucomigt_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm1, %xmm0 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_ucomigt_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomisd %xmm1, %xmm0 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1192,16 +1192,16 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomile_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_ucomile_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm0, %xmm1 ; SSE-NEXT: setae %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_ucomile_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomisd %xmm0, %xmm1 ; KNL-NEXT: setae %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res @@ -1212,16 +1212,16 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_x86_sse2_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_ucomilt_sd: ; SSE: ## BB#0: -; SSE-NEXT: xorl %eax, %eax ; SSE-NEXT: ucomisd %xmm0, %xmm1 ; SSE-NEXT: seta %al +; SSE-NEXT: movzbl %al, %eax ; SSE-NEXT: retl ; ; KNL-LABEL: test_x86_sse2_ucomilt_sd: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vucomisd %xmm0, %xmm1 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ; [#uses=1] ret i32 %res diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll index 16868d854df7..f1e01db59607 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-fast-isel.ll @@ -918,16 +918,16 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone define i32 @test_mm_test_all_zeros(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_test_all_zeros: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_test_all_zeros: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ret i32 %res @@ -937,16 +937,16 @@ declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone define i32 @test_mm_test_mix_ones_zeros(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_test_mix_ones_zeros: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_test_mix_ones_zeros: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ret i32 %res @@ -974,16 +974,16 @@ define i32 @test_mm_testc_si128(<2 x i64> %a0, <2 x i64> %a1) { define i32 @test_mm_testnzc_si128(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_testnzc_si128: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testnzc_si128: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ret i32 %res @@ -992,16 +992,16 @@ define i32 @test_mm_testnzc_si128(<2 x i64> %a0, <2 x i64> %a1) { define i32 @test_mm_testz_si128(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_testz_si128: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testz_si128: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ret i32 %res diff --git a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll index 58eae1057f89..b8d058cc12ed 100644 --- a/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse41-intrinsics-x86.ll @@ -323,16 +323,16 @@ declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) { ; SSE41-LABEL: test_x86_sse41_ptestnzc: ; SSE41: ## BB#0: -; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: ptest %xmm1, %xmm0 ; SSE41-NEXT: seta %al +; SSE41-NEXT: movzbl %al, %eax ; SSE41-NEXT: retl ; ; KNL-LABEL: test_x86_sse41_ptestnzc: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vptest %xmm1, %xmm0 ; KNL-NEXT: seta %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res @@ -343,16 +343,16 @@ declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) { ; SSE41-LABEL: test_x86_sse41_ptestz: ; SSE41: ## BB#0: -; SSE41-NEXT: xorl %eax, %eax ; SSE41-NEXT: ptest %xmm1, %xmm0 ; SSE41-NEXT: sete %al +; SSE41-NEXT: movzbl %al, %eax ; SSE41-NEXT: retl ; ; KNL-LABEL: test_x86_sse41_ptestz: ; KNL: ## BB#0: -; KNL-NEXT: xorl %eax, %eax ; KNL-NEXT: vptest %xmm1, %xmm0 ; KNL-NEXT: sete %al +; KNL-NEXT: movzbl %al, %eax ; KNL-NEXT: retl %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; [#uses=1] ret i32 %res diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll index 3cb754c8f93f..ffd58eb64263 100644 --- a/llvm/test/CodeGen/X86/sse41.ll +++ b/llvm/test/CodeGen/X86/sse41.ll @@ -208,16 +208,16 @@ define <4 x float> @blendps_not_insertps_2(<4 x float> %t1, <4 x float> %t2) nou define i32 @ptestz_1(<2 x i64> %t1, <2 x i64> %t2) nounwind { ; X32-LABEL: ptestz_1: ; X32: ## BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: ptestz_1: ; X64: ## BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %tmp1 = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 @@ -244,16 +244,16 @@ define i32 @ptestz_2(<2 x i64> %t1, <2 x i64> %t2) nounwind { define i32 @ptestz_3(<2 x i64> %t1, <2 x i64> %t2) nounwind { ; X32-LABEL: ptestz_3: ; X32: ## BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: ptest %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: ptestz_3: ; X64: ## BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: ptest %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %tmp1 = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %t1, <2 x i64> %t2) nounwind readnone ret i32 %tmp1 diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll index 53b94e7f0d39..926ace809176 100644 --- a/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse42-intrinsics-fast-isel.ll @@ -4,27 +4,23 @@ ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c -define i32 @test_mm_cmpestra(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { +define i32 @test_mm_cmpestra(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { ; X32-LABEL: test_mm_cmpestra: ; X32: # BB#0: -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X32-NEXT: seta %bl -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popl %ebx +; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpestra: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seta %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -103,27 +99,23 @@ define <2 x i64> @test_mm_cmpestrm(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a } declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_mm_cmpestro(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { +define i32 @test_mm_cmpestro(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { ; X32-LABEL: test_mm_cmpestro: ; X32: # BB#0: -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X32-NEXT: seto %bl -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popl %ebx +; X32-NEXT: seto %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpestro: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: seto %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: seto %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -132,27 +124,23 @@ define i32 @test_mm_cmpestro(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nou } declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_mm_cmpestrs(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { +define i32 @test_mm_cmpestrs(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { ; X32-LABEL: test_mm_cmpestrs: ; X32: # BB#0: -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X32-NEXT: sets %bl -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popl %ebx +; X32-NEXT: sets %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpestrs: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sets %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sets %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -161,27 +149,23 @@ define i32 @test_mm_cmpestrs(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nou } declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_mm_cmpestrz(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) nounwind { +define i32 @test_mm_cmpestrz(<2 x i64> %a0, i32 %a1, <2 x i64> %a2, i32 %a3) { ; X32-LABEL: test_mm_cmpestrz: ; X32: # BB#0: -; X32-NEXT: pushl %ebx ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: xorl %ebx, %ebx ; X32-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X32-NEXT: sete %bl -; X32-NEXT: movl %ebx, %eax -; X32-NEXT: popl %ebx +; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpestrz: ; X64: # BB#0: -; X64-NEXT: xorl %r8d, %r8d ; X64-NEXT: movl %edi, %eax ; X64-NEXT: movl %esi, %edx ; X64-NEXT: pcmpestri $7, %xmm1, %xmm0 -; X64-NEXT: sete %r8b -; X64-NEXT: movl %r8d, %eax +; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg2 = bitcast <2 x i64> %a2 to <16 x i8> @@ -208,16 +192,16 @@ define <2 x i64> @test_mm_cmpgt_epi64(<2 x i64> %a0, <2 x i64> %a1) { define i32 @test_mm_cmpistra(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_cmpistra: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X32-NEXT: seta %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpistra: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X64-NEXT: seta %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> @@ -287,16 +271,16 @@ declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwin define i32 @test_mm_cmpistro(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_cmpistro: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X32-NEXT: seto %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpistro: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X64-NEXT: seto %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> @@ -308,16 +292,16 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_mm_cmpistrs(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_cmpistrs: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X32-NEXT: sets %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpistrs: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X64-NEXT: sets %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> @@ -329,16 +313,16 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_mm_cmpistrz(<2 x i64> %a0, <2 x i64> %a1) { ; X32-LABEL: test_mm_cmpistrz: ; X32: # BB#0: -; X32-NEXT: xorl %eax, %eax ; X32-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X32-NEXT: sete %al +; X32-NEXT: movzbl %al, %eax ; X32-NEXT: retl ; ; X64-LABEL: test_mm_cmpistrz: ; X64: # BB#0: -; X64-NEXT: xorl %eax, %eax ; X64-NEXT: pcmpistri $7, %xmm1, %xmm0 ; X64-NEXT: sete %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: retq %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> diff --git a/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll index 2b31109ce45c..d1f4e58c714b 100644 --- a/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse42-intrinsics-x86.ll @@ -33,17 +33,14 @@ define i32 @test_x86_sse42_pcmpestri128_load(<16 x i8>* %a0, <16 x i8>* %a2) { } -define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestria128(<16 x i8> %a0, <16 x i8> %a2) { ; CHECK-LABEL: test_x86_sse42_pcmpestria128: ; CHECK: ## BB#0: -; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: movl $7, %eax ; CHECK-NEXT: movl $7, %edx -; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: pcmpestri $7, %xmm1, %xmm0 -; CHECK-NEXT: seta %bl -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: seta %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestria128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -66,17 +63,14 @@ define i32 @test_x86_sse42_pcmpestric128(<16 x i8> %a0, <16 x i8> %a2) { declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) { ; CHECK-LABEL: test_x86_sse42_pcmpestrio128: ; CHECK: ## BB#0: -; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: movl $7, %eax ; CHECK-NEXT: movl $7, %edx -; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: pcmpestri $7, %xmm1, %xmm0 -; CHECK-NEXT: seto %bl -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: seto %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -84,17 +78,14 @@ define i32 @test_x86_sse42_pcmpestrio128(<16 x i8> %a0, <16 x i8> %a2) nounwind declare i32 @llvm.x86.sse42.pcmpestrio128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) { ; CHECK-LABEL: test_x86_sse42_pcmpestris128: ; CHECK: ## BB#0: -; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: movl $7, %eax ; CHECK-NEXT: movl $7, %edx -; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: pcmpestri $7, %xmm1, %xmm0 -; CHECK-NEXT: sets %bl -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: sets %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestris128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -102,17 +93,14 @@ define i32 @test_x86_sse42_pcmpestris128(<16 x i8> %a0, <16 x i8> %a2) nounwind declare i32 @llvm.x86.sse42.pcmpestris128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone -define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) nounwind { +define i32 @test_x86_sse42_pcmpestriz128(<16 x i8> %a0, <16 x i8> %a2) { ; CHECK-LABEL: test_x86_sse42_pcmpestriz128: ; CHECK: ## BB#0: -; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: movl $7, %eax ; CHECK-NEXT: movl $7, %edx -; CHECK-NEXT: xorl %ebx, %ebx ; CHECK-NEXT: pcmpestri $7, %xmm1, %xmm0 -; CHECK-NEXT: sete %bl -; CHECK-NEXT: movl %ebx, %eax -; CHECK-NEXT: popl %ebx +; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpestriz128(<16 x i8> %a0, i32 7, <16 x i8> %a2, i32 7, i8 7) ; [#uses=1] ret i32 %res @@ -178,9 +166,9 @@ define i32 @test_x86_sse42_pcmpistri128_load(<16 x i8>* %a0, <16 x i8>* %a1) { define i32 @test_x86_sse42_pcmpistria128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_x86_sse42_pcmpistria128: ; CHECK: ## BB#0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: pcmpistri $7, %xmm1, %xmm0 ; CHECK-NEXT: seta %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistria128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -204,9 +192,9 @@ declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_x86_sse42_pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_x86_sse42_pcmpistrio128: ; CHECK: ## BB#0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: pcmpistri $7, %xmm1, %xmm0 ; CHECK-NEXT: seto %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -217,9 +205,9 @@ declare i32 @llvm.x86.sse42.pcmpistrio128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_x86_sse42_pcmpistris128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_x86_sse42_pcmpistris128: ; CHECK: ## BB#0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: pcmpistri $7, %xmm1, %xmm0 ; CHECK-NEXT: sets %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistris128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res @@ -230,9 +218,9 @@ declare i32 @llvm.x86.sse42.pcmpistris128(<16 x i8>, <16 x i8>, i8) nounwind rea define i32 @test_x86_sse42_pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1) { ; CHECK-LABEL: test_x86_sse42_pcmpistriz128: ; CHECK: ## BB#0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: pcmpistri $7, %xmm1, %xmm0 ; CHECK-NEXT: sete %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: retl %res = call i32 @llvm.x86.sse42.pcmpistriz128(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; [#uses=1] ret i32 %res