[X86] Bring back the MOV64r0 pseudo instruction

This patch brings back the MOV64r0 pseudo instruction for zeroing a 64-bit register. This replaces the SUBREG_TO_REG MOV32r0 sequence we use today. Post register allocation we will rewrite the MOV64r0 to a 32-bit xor with an implicit def of the 64-bit register similar to what we do for the various XMM/YMM/ZMM zeroing pseudos.

My main motivation is to enable the spill optimization in foldMemoryOperandImpl. As we were seeing some code that repeatedly did "xor eax, eax; store eax;" to spill several registers with a new xor for each store. With this optimization enabled we get a store of a 0 immediate instead of an xor. Though I admit the ideal solution would be one xor where there are multiple spills. I don't believe we have a test case that shows this optimization in here. I'll see if I can try to reduce one from the code were looking at.

There's definitely some other machine CSE(and maybe other passes) behavior changes exposed by this patch. So it seems like there might be some other deficiencies in SUBREG_TO_REG handling.

Differential Revision: https://reviews.llvm.org/D52757

llvm-svn: 345165
This commit is contained in:
Craig Topper 2018-10-24 17:32:09 +00:00
parent 2cce074e8c
commit 2417273255
17 changed files with 531 additions and 480 deletions

View File

@ -1916,8 +1916,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
{ &X86::GR64RegClass, X86::RAX, X86::RDX, {
{ X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv
{ X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem
{ X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv
{ X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem
{ X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv
{ X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem
}
}, // i64
};
@ -1964,26 +1964,22 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(OpEntry.OpSignExtend));
else {
unsigned Zero32 = createResultReg(&X86::GR32RegClass);
unsigned ZeroReg = createResultReg(VT == MVT::i64 ? &X86::GR64RegClass
: &X86::GR32RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(X86::MOV32r0), Zero32);
TII.get(OpEntry.OpSignExtend), ZeroReg);
// Copy the zero into the appropriate sub/super/identical physical
// register. Unfortunately the operations needed are not uniform enough
// to fit neatly into the table above.
if (VT == MVT::i16) {
if (VT == MVT::i16)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), TypeEntry.HighInReg)
.addReg(Zero32, 0, X86::sub_16bit);
} else if (VT == MVT::i32) {
.addReg(ZeroReg, 0, X86::sub_16bit);
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Copy), TypeEntry.HighInReg)
.addReg(Zero32);
} else if (VT == MVT::i64) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
.addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
}
.addReg(ZeroReg);
}
}
// Generate the DIV/IDIV instruction.
@ -3708,6 +3704,9 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
uint64_t Imm = CI->getZExtValue();
if (Imm == 0) {
if (VT.SimpleTy == MVT::i64)
return fastEmitInst_(X86::MOV64r0, &X86::GR64RegClass);
unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
switch (VT.SimpleTy) {
default: llvm_unreachable("Unexpected value type");
@ -3720,13 +3719,6 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
X86::sub_16bit);
case MVT::i32:
return SrcReg;
case MVT::i64: {
unsigned ResultReg = createResultReg(&X86::GR64RegClass);
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
.addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
return ResultReg;
}
}
}

View File

@ -3569,7 +3569,10 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
} else {
// Zero out the high part, effectively zero extending the input.
SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
unsigned ClrOpc = NVT.SimpleTy == MVT::i64 ? X86::MOV64r0
: X86::MOV32r0;
MVT ClrVT = NVT.SimpleTy == MVT::i64 ? MVT::i64 : MVT::i32;
SDValue ClrNode = SDValue(CurDAG->getMachineNode(ClrOpc, dl, ClrVT), 0);
switch (NVT.SimpleTy) {
case MVT::i16:
ClrNode =
@ -3580,15 +3583,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
0);
break;
case MVT::i32:
break;
case MVT::i64:
ClrNode =
SDValue(CurDAG->getMachineNode(
TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
CurDAG->getTargetConstant(X86::sub_32bit, dl,
MVT::i32)),
0);
break;
default:
llvm_unreachable("Unexpected division source");

View File

@ -275,16 +275,18 @@ def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), "", []>;
// Alias instruction mapping movr0 to xor.
// FIXME: remove when we can teach regalloc that xor reg, reg is ok.
let Defs = [EFLAGS], isReMaterializable = 1, isAsCheapAsAMove = 1,
isPseudo = 1, AddedComplexity = 10 in
isPseudo = 1, AddedComplexity = 10 in {
def MOV32r0 : I<0, Pseudo, (outs GR32:$dst), (ins), "",
[(set GR32:$dst, 0)]>, Sched<[WriteZero]>;
def MOV64r0 : I<0, Pseudo, (outs GR64:$dst), (ins), "",
[(set GR64:$dst, 0)]>, Sched<[WriteZero]>;
}
// Other widths can also make use of the 32-bit xor, which may have a smaller
// encoding and avoid partial register updates.
let AddedComplexity = 10 in {
def : Pat<(i8 0), (EXTRACT_SUBREG (MOV32r0), sub_8bit)>;
def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;
}
let Predicates = [OptForSize, Not64BitMode],

View File

@ -683,8 +683,10 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
if (ClobbersEFLAGS && !isSafeToClobberEFLAGS(MBB, I)) {
// The instruction clobbers EFLAGS. Re-materialize as MOV32ri to avoid side
// effects.
unsigned NewOpc = X86::MOV32ri;
int Value;
switch (Orig.getOpcode()) {
case X86::MOV64r0: NewOpc = X86::MOV32ri64; Value = 0; break;
case X86::MOV32r0: Value = 0; break;
case X86::MOV32r1: Value = 1; break;
case X86::MOV32r_1: Value = -1; break;
@ -693,7 +695,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
}
const DebugLoc &DL = Orig.getDebugLoc();
BuildMI(MBB, I, DL, get(X86::MOV32ri))
BuildMI(MBB, I, DL, get(NewOpc))
.add(Orig.getOperand(0))
.addImm(Value);
} else {
@ -3750,7 +3752,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
// MOV32r0 etc. are implemented with xor which clobbers condition code.
// They are safe to move up, if the definition to EFLAGS is dead and
// earlier instructions do not read or write EFLAGS.
if (!Movr0Inst && Instr.getOpcode() == X86::MOV32r0 &&
if (!Movr0Inst &&
(Instr.getOpcode() == X86::MOV32r0 ||
Instr.getOpcode() == X86::MOV64r0) &&
Instr.registerDefIsDead(X86::EFLAGS, TRI)) {
Movr0Inst = &Instr;
continue;
@ -4155,6 +4159,15 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
switch (MI.getOpcode()) {
case X86::MOV32r0:
return Expand2AddrUndef(MIB, get(X86::XOR32rr));
case X86::MOV64r0: {
const TargetRegisterInfo *TRI = &getRegisterInfo();
unsigned Reg = MIB->getOperand(0).getReg();
unsigned Reg32 = TRI->getSubReg(Reg, X86::sub_32bit);
MIB->getOperand(0).setReg(Reg32);
Expand2AddrUndef(MIB, get(X86::XOR32rr));
MIB.addReg(Reg, RegState::ImplicitDefine);
return true;
}
case X86::MOV32r1:
return expandMOV32r1(MIB, *this, /*MinusOne=*/ false);
case X86::MOV32r_1:
@ -4898,8 +4911,10 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
isTwoAddrFold = true;
} else {
if (OpNum == 0) {
if (MI.getOpcode() == X86::MOV32r0) {
NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, InsertPt, MI);
if (MI.getOpcode() == X86::MOV32r0 || MI.getOpcode() == X86::MOV64r0) {
unsigned NewOpc = MI.getOpcode() == X86::MOV64r0 ? X86::MOV64mi32
: X86::MOV32mi;
NewMI = MakeM0Inst(*this, NewOpc, MOs, InsertPt, MI);
if (NewMI)
return NewMI;
}

View File

@ -487,20 +487,14 @@ bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
// Otherwise, just build the predicate state itself by zeroing a register
// as we don't need any initial state.
PS->InitialReg = MRI->createVirtualRegister(PS->RC);
unsigned PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
PredStateSubReg);
auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64r0),
PS->InitialReg);
++NumInstsInserted;
MachineOperand *ZeroEFLAGSDefOp =
ZeroI->findRegisterDefOperand(X86::EFLAGS);
assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
"Must have an implicit def of EFLAGS!");
ZeroEFLAGSDefOp->setIsDead(true);
BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
PS->InitialReg)
.addImm(0)
.addReg(PredStateSubReg)
.addImm(X86::sub_32bit);
}
// We're going to need to trace predicate state throughout the function's

View File

@ -54,7 +54,7 @@ define i64 @const_i64_i32() {
define void @main(i32 ** %data) {
; ALL-LABEL: main:
; ALL: # %bb.0:
; ALL-NEXT: movq $0, %rax
; ALL-NEXT: xorl %eax, %eax
; ALL-NEXT: movq %rax, (%rdi)
; ALL-NEXT: retq
store i32* null, i32** %data, align 8

View File

@ -2141,7 +2141,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: pushq %r13
; AVX1-NEXT: pushq %r12
; AVX1-NEXT: pushq %rbx
; AVX1-NEXT: subq $24, %rsp
; AVX1-NEXT: subq $16, %rsp
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
@ -2152,12 +2152,12 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: vmovq %xmm5, %rbp
; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm4[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm4[0],zero,xmm4[1],zero
; AVX1-NEXT: vpextrq $1, %xmm4, %rsi
; AVX1-NEXT: vmovq %xmm4, %rcx
; AVX1-NEXT: vpextrq $1, %xmm4, %rcx
; AVX1-NEXT: vmovq %xmm4, %rsi
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vpextrq $1, %xmm4, %r8
; AVX1-NEXT: vpextrq $1, %xmm4, %r10
; AVX1-NEXT: vmovq %xmm4, %r11
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
@ -2166,7 +2166,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vpextrq $1, %xmm4, %r15
; AVX1-NEXT: vmovq %xmm4, %rdi
; AVX1-NEXT: vmovq %xmm4, %rdx
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vpextrq $1, %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
@ -2175,27 +2175,28 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpextrq $1, %xmm3, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX1-NEXT: vmovq %xmm3, %r10
; AVX1-NEXT: vmovq %xmm3, %r9
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vpextrq $1, %xmm4, %rdx
; AVX1-NEXT: addq %rbx, %rdx
; AVX1-NEXT: vmovq %xmm4, %r9
; AVX1-NEXT: addq %rbp, %r9
; AVX1-NEXT: vpextrq $1, %xmm4, %r14
; AVX1-NEXT: addq %rbx, %r14
; AVX1-NEXT: vmovq %xmm4, %r8
; AVX1-NEXT: addq %rbp, %r8
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
; AVX1-NEXT: vpextrq $1, %xmm3, %rdi
; AVX1-NEXT: addq %rcx, %rdi
; AVX1-NEXT: vmovq %xmm3, %rax
; AVX1-NEXT: addq %rsi, %rax
; AVX1-NEXT: movq %rax, %r14
; AVX1-NEXT: vmovq %xmm3, %rbp
; AVX1-NEXT: addq %rcx, %rbp
; AVX1-NEXT: movq %rax, %rsi
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
; AVX1-NEXT: vpextrq $1, %xmm3, %rsi
; AVX1-NEXT: addq %r8, %rsi
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
; AVX1-NEXT: addq %r10, %rax
; AVX1-NEXT: movq %rax, %r10
; AVX1-NEXT: vmovq %xmm3, %rax
; AVX1-NEXT: addq %r11, %rax
; AVX1-NEXT: movq %rax, %r11
@ -2203,17 +2204,17 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
; AVX1-NEXT: addq %r13, %rax
; AVX1-NEXT: movq %rax, %rcx
; AVX1-NEXT: movq %rax, %rbx
; AVX1-NEXT: vmovq %xmm2, %rax
; AVX1-NEXT: addq %r12, %rax
; AVX1-NEXT: movq %rax, %r8
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero
; AVX1-NEXT: vpextrq $1, %xmm3, %rax
; AVX1-NEXT: addq %r15, %rax
; AVX1-NEXT: movq %rax, %rbx
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: vmovq %xmm3, %rax
; AVX1-NEXT: addq %rdi, %rax
; AVX1-NEXT: addq %rdx, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero
@ -2226,36 +2227,30 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vpextrq $1, %xmm2, %rax
; AVX1-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: vmovq %xmm2, %r12
; AVX1-NEXT: addq %r10, %r12
; AVX1-NEXT: vpextrq $1, %xmm2, %rbp
; AVX1-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; AVX1-NEXT: vmovq %xmm2, %r15
; AVX1-NEXT: addq %r9, %r15
; AVX1-NEXT: vpextrq $1, %xmm1, %rax
; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
; AVX1-NEXT: vpextrq $1, %xmm0, %r10
; AVX1-NEXT: addq %rax, %r10
; AVX1-NEXT: vmovq %xmm1, %rax
; AVX1-NEXT: vmovq %xmm0, %rdi
; AVX1-NEXT: addq %rax, %rdi
; AVX1-NEXT: addq $-1, %rdx
; AVX1-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: addq $-1, %r9
; AVX1-NEXT: movq %r9, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: vpextrq $1, %xmm0, %r9
; AVX1-NEXT: addq %rax, %r9
; AVX1-NEXT: vmovq %xmm1, %rcx
; AVX1-NEXT: vmovq %xmm0, %rdx
; AVX1-NEXT: addq %rcx, %rdx
; AVX1-NEXT: addq $-1, %r14
; AVX1-NEXT: movq %r14, (%rsp) # 8-byte Spill
; AVX1-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: addq $-1, %rbp
; AVX1-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: addq $-1, %r8
; AVX1-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, (%rsp) # 8-byte Spill
; AVX1-NEXT: addq $-1, %rdi
; AVX1-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@ -2264,98 +2259,100 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: addq $-1, %r10
; AVX1-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: addq $-1, %r11
; AVX1-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: addq $-1, %rcx
; AVX1-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %ebp
; AVX1-NEXT: adcq $-1, %rbp
; AVX1-NEXT: addq $-1, %r8
; AVX1-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %r15d
; AVX1-NEXT: adcq $-1, %r15
; AVX1-NEXT: addq $-1, %rbx
; AVX1-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: movq %rax, %rsi
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX1-NEXT: movl $0, %r12d
; AVX1-NEXT: adcq $-1, %r12
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX1-NEXT: movl $0, %r13d
; AVX1-NEXT: adcq $-1, %r13
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX1-NEXT: movl $0, %r14d
; AVX1-NEXT: adcq $-1, %r14
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX1-NEXT: addq $-1, %rdx
; AVX1-NEXT: movl $0, %r11d
; AVX1-NEXT: adcq $-1, %r11
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: addq $-1, %rax
; AVX1-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX1-NEXT: movl $0, %ebx
; AVX1-NEXT: adcq $-1, %rbx
; AVX1-NEXT: addq $-1, %r12
; AVX1-NEXT: movl $0, %r9d
; AVX1-NEXT: adcq $-1, %r9
; AVX1-NEXT: addq $-1, %r10
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX1-NEXT: addq $-1, %rcx
; AVX1-NEXT: movl $0, %r11d
; AVX1-NEXT: adcq $-1, %r11
; AVX1-NEXT: addq $-1, %rbp
; AVX1-NEXT: movl $0, %r10d
; AVX1-NEXT: adcq $-1, %r10
; AVX1-NEXT: addq $-1, %r15
; AVX1-NEXT: movl $0, %r8d
; AVX1-NEXT: adcq $-1, %r8
; AVX1-NEXT: addq $-1, %rdi
; AVX1-NEXT: movl $0, %ecx
; AVX1-NEXT: adcq $-1, %rcx
; AVX1-NEXT: shldq $63, %rdi, %rcx
; AVX1-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: shldq $63, %r10, %r8
; AVX1-NEXT: shldq $63, %r12, %r9
; AVX1-NEXT: shldq $63, %rax, %rbx
; AVX1-NEXT: shldq $63, %rdx, %r11
; AVX1-NEXT: addq $-1, %r9
; AVX1-NEXT: movl $0, %edi
; AVX1-NEXT: adcq $-1, %rdi
; AVX1-NEXT: addq $-1, %rdx
; AVX1-NEXT: movl $0, %eax
; AVX1-NEXT: adcq $-1, %rax
; AVX1-NEXT: shldq $63, %rdx, %rax
; AVX1-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: shldq $63, %r9, %rdi
; AVX1-NEXT: shldq $63, %r15, %r8
; AVX1-NEXT: shldq $63, %rbp, %r10
; AVX1-NEXT: shldq $63, %rcx, %r11
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX1-NEXT: shldq $63, %rdx, %rbx
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX1-NEXT: shldq $63, %rdx, %r14
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX1-NEXT: shldq $63, %rdx, %r13
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX1-NEXT: shldq $63, %rdx, %r12
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %rsi
; AVX1-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %r15
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %rbp
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %rsi
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %r15
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %rcx
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %rdi
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
; AVX1-NEXT: movq (%rsp), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %r12
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r10 # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rax, %r10
; AVX1-NEXT: shldq $63, %rcx, %rax
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX1-NEXT: shldq $63, %rcx, %r9
; AVX1-NEXT: movq (%rsp), %rcx # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX1-NEXT: shldq $63, %rdx, %rax
; AVX1-NEXT: vmovq %rax, %xmm8
; AVX1-NEXT: vmovq %r10, %xmm0
; AVX1-NEXT: vmovq %r12, %xmm1
; AVX1-NEXT: vmovq %rdi, %xmm11
; AVX1-NEXT: vmovq %rcx, %xmm2
; AVX1-NEXT: shldq $63, %rdx, %rcx
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX1-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
; AVX1-NEXT: shldq $63, %rbp, %rdx
; AVX1-NEXT: vmovq %rdx, %xmm8
; AVX1-NEXT: vmovq %rcx, %xmm0
; AVX1-NEXT: vmovq %r9, %xmm1
; AVX1-NEXT: vmovq %rax, %xmm11
; AVX1-NEXT: vmovq %r15, %xmm2
; AVX1-NEXT: vmovq %rsi, %xmm13
; AVX1-NEXT: vmovq %rbp, %xmm14
; AVX1-NEXT: vmovq %r15, %xmm15
; AVX1-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm9 # 8-byte Folded Reload
; AVX1-NEXT: # xmm9 = mem[0],zero
; AVX1-NEXT: vmovq %r13, %xmm10
; AVX1-NEXT: vmovq %r14, %xmm12
; AVX1-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 8-byte Folded Reload
; AVX1-NEXT: # xmm14 = mem[0],zero
; AVX1-NEXT: vmovq %r12, %xmm15
; AVX1-NEXT: vmovq %r13, %xmm9
; AVX1-NEXT: vmovq %r14, %xmm10
; AVX1-NEXT: vmovq %rbx, %xmm12
; AVX1-NEXT: vmovq %r11, %xmm3
; AVX1-NEXT: vmovq %rbx, %xmm4
; AVX1-NEXT: vmovq %r9, %xmm5
; AVX1-NEXT: vmovq %r8, %xmm6
; AVX1-NEXT: vmovq %r10, %xmm4
; AVX1-NEXT: vmovq %r8, %xmm5
; AVX1-NEXT: vmovq %rdi, %xmm6
; AVX1-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 8-byte Folded Reload
; AVX1-NEXT: # xmm7 = mem[0],zero
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm0[0],xmm8[0]
@ -2382,7 +2379,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX1-NEXT: vpshufb %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
; AVX1-NEXT: vmovdqu %xmm0, (%rax)
; AVX1-NEXT: addq $24, %rsp
; AVX1-NEXT: addq $16, %rsp
; AVX1-NEXT: popq %rbx
; AVX1-NEXT: popq %r12
; AVX1-NEXT: popq %r13
@ -2407,15 +2404,15 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
; AVX2-NEXT: vpextrq $1, %xmm4, %rbx
; AVX2-NEXT: vmovq %xmm4, %rbp
; AVX2-NEXT: vmovq %xmm4, %rdx
; AVX2-NEXT: vpextrq $1, %xmm3, %rdi
; AVX2-NEXT: vmovq %xmm3, %rcx
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpextrq $1, %xmm3, %rdx
; AVX2-NEXT: vmovq %xmm3, %r9
; AVX2-NEXT: vpextrq $1, %xmm2, %r11
; AVX2-NEXT: vpextrq $1, %xmm3, %r9
; AVX2-NEXT: vmovq %xmm3, %r10
; AVX2-NEXT: vpextrq $1, %xmm2, %r13
; AVX2-NEXT: vmovq %xmm2, %r12
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
@ -2433,26 +2430,26 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
; AVX2-NEXT: vpextrq $1, %xmm4, %rax
; AVX2-NEXT: addq %rbx, %rax
; AVX2-NEXT: movq %rax, %rbx
; AVX2-NEXT: vmovq %xmm4, %r13
; AVX2-NEXT: addq %rbp, %r13
; AVX2-NEXT: vpextrq $1, %xmm3, %r10
; AVX2-NEXT: addq %rdi, %r10
; AVX2-NEXT: vpextrq $1, %xmm4, %rbp
; AVX2-NEXT: addq %rbx, %rbp
; AVX2-NEXT: vmovq %xmm4, %rax
; AVX2-NEXT: addq %rdx, %rax
; AVX2-NEXT: movq %rax, %r11
; AVX2-NEXT: vpextrq $1, %xmm3, %r8
; AVX2-NEXT: addq %rdi, %r8
; AVX2-NEXT: vmovq %xmm3, %r14
; AVX2-NEXT: addq %rcx, %r14
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm2
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpextrq $1, %xmm3, %rax
; AVX2-NEXT: addq %rdx, %rax
; AVX2-NEXT: movq %rax, %rcx
; AVX2-NEXT: vmovq %xmm3, %r8
; AVX2-NEXT: addq %r9, %r8
; AVX2-NEXT: vpextrq $1, %xmm2, %rax
; AVX2-NEXT: addq %r11, %rax
; AVX2-NEXT: movq %rax, %r11
; AVX2-NEXT: addq %r9, %rax
; AVX2-NEXT: movq %rax, %rbx
; AVX2-NEXT: vmovq %xmm3, %rax
; AVX2-NEXT: addq %r10, %rax
; AVX2-NEXT: movq %rax, %r10
; AVX2-NEXT: vpextrq $1, %xmm2, %rcx
; AVX2-NEXT: addq %r13, %rcx
; AVX2-NEXT: vmovq %xmm2, %rax
; AVX2-NEXT: addq %r12, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@ -2474,8 +2471,8 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX2-NEXT: vpextrq $1, %xmm2, %rbp
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; AVX2-NEXT: vpextrq $1, %xmm2, %r12
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
; AVX2-NEXT: vmovq %xmm2, %r9
; AVX2-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Folded Reload
; AVX2-NEXT: vpextrq $1, %xmm1, %rax
@ -2484,36 +2481,36 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: vmovq %xmm1, %rdx
; AVX2-NEXT: vmovq %xmm0, %rsi
; AVX2-NEXT: addq %rdx, %rsi
; AVX2-NEXT: addq $-1, %rbx
; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %rbp
; AVX2-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %r13
; AVX2-NEXT: movq %r13, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %r11
; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, (%rsp) # 8-byte Spill
; AVX2-NEXT: addq $-1, %r10
; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %r8
; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %r14
; AVX2-NEXT: movq %r14, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %r13d
; AVX2-NEXT: adcq $-1, %r13
; AVX2-NEXT: addq $-1, %rcx
; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %ebp
; AVX2-NEXT: adcq $-1, %rbp
; AVX2-NEXT: addq $-1, %rbx
; AVX2-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %eax
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %r8
; AVX2-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %r10
; AVX2-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %r15d
; AVX2-NEXT: adcq $-1, %r15
; AVX2-NEXT: addq $-1, %r11
; AVX2-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, %rcx
; AVX2-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: movl $0, %ebx
; AVX2-NEXT: adcq $-1, %rbx
; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
@ -2528,13 +2525,13 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: adcq $-1, %rax
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX2-NEXT: movl $0, %r12d
; AVX2-NEXT: adcq $-1, %r12
; AVX2-NEXT: movl $0, %r13d
; AVX2-NEXT: adcq $-1, %r13
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: addq $-1, %rcx
; AVX2-NEXT: movl $0, %r11d
; AVX2-NEXT: adcq $-1, %r11
; AVX2-NEXT: addq $-1, %rbp
; AVX2-NEXT: addq $-1, %r12
; AVX2-NEXT: movl $0, %r14d
; AVX2-NEXT: adcq $-1, %r14
; AVX2-NEXT: addq $-1, %r9
@ -2550,10 +2547,10 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX2-NEXT: shldq $63, %rdi, %rdx
; AVX2-NEXT: shldq $63, %r9, %r10
; AVX2-NEXT: shldq $63, %rbp, %r14
; AVX2-NEXT: shldq $63, %r12, %r14
; AVX2-NEXT: shldq $63, %rcx, %r11
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, %r12
; AVX2-NEXT: shldq $63, %rcx, %r13
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
@ -2569,10 +2566,10 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, %rax
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, %r13
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, %rbp
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Reload
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, %r12
; AVX2-NEXT: movq (%rsp), %rdi # 8-byte Reload
; AVX2-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX2-NEXT: shldq $63, %rcx, %rdi
@ -2581,8 +2578,8 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: shldq $63, %rcx, %rsi
; AVX2-NEXT: vmovq %rsi, %xmm8
; AVX2-NEXT: vmovq %rdi, %xmm9
; AVX2-NEXT: vmovq %rbp, %xmm10
; AVX2-NEXT: vmovq %r13, %xmm11
; AVX2-NEXT: vmovq %r12, %xmm10
; AVX2-NEXT: vmovq %rbp, %xmm11
; AVX2-NEXT: vmovq %rax, %xmm12
; AVX2-NEXT: vmovq %r15, %xmm13
; AVX2-NEXT: vmovq %rbx, %xmm14
@ -2590,7 +2587,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX2-NEXT: vmovq %r9, %xmm0
; AVX2-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
; AVX2-NEXT: # xmm1 = mem[0],zero
; AVX2-NEXT: vmovq %r12, %xmm2
; AVX2-NEXT: vmovq %r13, %xmm2
; AVX2-NEXT: vmovq %r11, %xmm3
; AVX2-NEXT: vmovq %r14, %xmm4
; AVX2-NEXT: vmovq %r10, %xmm5
@ -2647,7 +2644,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512-NEXT: pushq %r13
; AVX512-NEXT: pushq %r12
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: subq $24, %rsp
; AVX512-NEXT: subq $16, %rsp
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
@ -2660,8 +2657,8 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512-NEXT: vmovq %xmm3, %rsi
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512-NEXT: vpextrq $1, %xmm3, %rdx
; AVX512-NEXT: vmovq %xmm3, %r8
; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
; AVX512-NEXT: vmovq %xmm3, %r10
; AVX512-NEXT: vpextrq $1, %xmm2, %r13
; AVX512-NEXT: vmovq %xmm2, %r12
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0
@ -2669,7 +2666,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512-NEXT: vpextrq $1, %xmm3, %r15
; AVX512-NEXT: vpextrq $1, %xmm3, %rdx
; AVX512-NEXT: vmovq %xmm3, %r14
; AVX512-NEXT: vpextrq $1, %xmm2, %r9
; AVX512-NEXT: vmovq %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
@ -2681,35 +2678,34 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm3, %xmm4
; AVX512-NEXT: vpextrq $1, %xmm4, %rax
; AVX512-NEXT: addq %rbx, %rax
; AVX512-NEXT: movq %rax, %rbx
; AVX512-NEXT: vpextrq $1, %xmm4, %r11
; AVX512-NEXT: addq %rbx, %r11
; AVX512-NEXT: vmovq %xmm4, %rax
; AVX512-NEXT: addq %rbp, %rax
; AVX512-NEXT: movq %rax, %rbp
; AVX512-NEXT: vpextrq $1, %xmm3, %rax
; AVX512-NEXT: addq %rdi, %rax
; AVX512-NEXT: movq %rax, %rdi
; AVX512-NEXT: vmovq %xmm3, %r10
; AVX512-NEXT: addq %rsi, %r10
; AVX512-NEXT: movq %rax, %rbx
; AVX512-NEXT: vpextrq $1, %xmm3, %r8
; AVX512-NEXT: addq %rdi, %r8
; AVX512-NEXT: vmovq %xmm3, %r15
; AVX512-NEXT: addq %rsi, %r15
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512-NEXT: vpextrq $1, %xmm3, %rcx
; AVX512-NEXT: addq %rdx, %rcx
; AVX512-NEXT: vpextrq $1, %xmm3, %rdi
; AVX512-NEXT: addq %rcx, %rdi
; AVX512-NEXT: vmovq %xmm3, %rax
; AVX512-NEXT: addq %r8, %rax
; AVX512-NEXT: movq %rax, %r8
; AVX512-NEXT: addq %r10, %rax
; AVX512-NEXT: movq %rax, %r10
; AVX512-NEXT: vpextrq $1, %xmm2, %rsi
; AVX512-NEXT: addq %r13, %rsi
; AVX512-NEXT: vmovq %xmm2, %r11
; AVX512-NEXT: addq %r12, %r11
; AVX512-NEXT: vmovq %xmm2, %rax
; AVX512-NEXT: addq %r12, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm1
; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512-NEXT: vpextrq $1, %xmm3, %rax
; AVX512-NEXT: addq %r15, %rax
; AVX512-NEXT: addq %rdx, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: vmovq %xmm3, %rax
; AVX512-NEXT: addq %r14, %rax
@ -2722,24 +2718,33 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
; AVX512-NEXT: vextracti128 $1, %ymm1, %xmm2
; AVX512-NEXT: vpextrq $1, %xmm2, %rax
; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: vpextrq $1, %xmm2, %rbp
; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
; AVX512-NEXT: vmovq %xmm2, %r14
; AVX512-NEXT: addq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Folded Reload
; AVX512-NEXT: vpextrq $1, %xmm0, %rax
; AVX512-NEXT: vpextrq $1, %xmm1, %r9
; AVX512-NEXT: addq %rax, %r9
; AVX512-NEXT: vmovq %xmm0, %rax
; AVX512-NEXT: vmovq %xmm0, %rcx
; AVX512-NEXT: vmovq %xmm1, %rdx
; AVX512-NEXT: addq %rax, %rdx
; AVX512-NEXT: addq %rcx, %rdx
; AVX512-NEXT: addq $-1, %r11
; AVX512-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: addq $-1, %rbx
; AVX512-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, (%rsp) # 8-byte Spill
; AVX512-NEXT: addq $-1, %r8
; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: addq $-1, %rbp
; AVX512-NEXT: movq %rbp, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: addq $-1, %r15
; AVX512-NEXT: movq %r15, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@ -2747,108 +2752,94 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, (%rsp) # 8-byte Spill
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: addq $-1, %r10
; AVX512-NEXT: movq %r10, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: addq $-1, %rcx
; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: addq $-1, %r8
; AVX512-NEXT: movq %r8, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movq %rax, %rcx
; AVX512-NEXT: addq $-1, %rsi
; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %r13d
; AVX512-NEXT: adcq $-1, %r13
; AVX512-NEXT: addq $-1, %r11
; AVX512-NEXT: movq %r11, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movl $0, %r15d
; AVX512-NEXT: adcq $-1, %r15
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: movq %rax, %rsi
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: movl $0, %r12d
; AVX512-NEXT: adcq $-1, %r12
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: movl $0, %ebx
; AVX512-NEXT: adcq $-1, %rbx
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
; AVX512-NEXT: addq $-1, %rbp
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: movl $0, %r13d
; AVX512-NEXT: adcq $-1, %r13
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: movl $0, %r15d
; AVX512-NEXT: adcq $-1, %r15
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: movl $0, %r11d
; AVX512-NEXT: adcq $-1, %r11
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: addq $-1, %rax
; AVX512-NEXT: addq $-1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
; AVX512-NEXT: movl $0, %r8d
; AVX512-NEXT: adcq $-1, %r8
; AVX512-NEXT: addq $-1, %rbp
; AVX512-NEXT: movl $0, %r10d
; AVX512-NEXT: adcq $-1, %r10
; AVX512-NEXT: addq $-1, %r14
; AVX512-NEXT: movl $0, %r8d
; AVX512-NEXT: adcq $-1, %r8
; AVX512-NEXT: addq $-1, %r9
; AVX512-NEXT: movl $0, %edi
; AVX512-NEXT: adcq $-1, %rdi
; AVX512-NEXT: addq $-1, %r9
; AVX512-NEXT: movl $0, %esi
; AVX512-NEXT: adcq $-1, %rsi
; AVX512-NEXT: addq $-1, %rdx
; AVX512-NEXT: movl $0, %ecx
; AVX512-NEXT: adcq $-1, %rcx
; AVX512-NEXT: shldq $63, %rdx, %rcx
; AVX512-NEXT: movl $0, %eax
; AVX512-NEXT: adcq $-1, %rax
; AVX512-NEXT: shldq $63, %rdx, %rax
; AVX512-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: shldq $63, %r9, %rsi
; AVX512-NEXT: shldq $63, %r14, %rdi
; AVX512-NEXT: shldq $63, %rbp, %r10
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r8
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r11
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r15
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r13
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: shldq $63, %rax, %rbx
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: shldq $63, %rax, %r12
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: shldq $63, %rax, %rcx
; AVX512-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: shldq $63, %r9, %rdi
; AVX512-NEXT: shldq $63, %r14, %r8
; AVX512-NEXT: shldq $63, %rax, %r10
; AVX512-NEXT: shldq $63, %rbp, %r11
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %rbx
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r12
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %rsi
; AVX512-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: shldq $63, %rax, %r15
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: shldq $63, %rax, %r13
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: shldq $63, %rax, %rsi
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: shldq $63, %rax, %rcx
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %rax
; AVX512-NEXT: movq (%rsp), %r14 # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r14 # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r14
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload
; AVX512-NEXT: movq (%rsp), %r9 # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %r9
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
; AVX512-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Reload
; AVX512-NEXT: shldq $63, %rdx, %rbp
; AVX512-NEXT: vmovq %rbp, %xmm8
; AVX512-NEXT: shldq $63, %rbp, %rdx
; AVX512-NEXT: vmovq %rdx, %xmm8
; AVX512-NEXT: vmovq %r9, %xmm9
; AVX512-NEXT: vmovq %r14, %xmm10
; AVX512-NEXT: vmovq %rax, %xmm11
; AVX512-NEXT: vmovq %rcx, %xmm12
; AVX512-NEXT: vmovq %rsi, %xmm13
; AVX512-NEXT: vmovq %r13, %xmm14
; AVX512-NEXT: vmovq %r15, %xmm15
; AVX512-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
; AVX512-NEXT: # xmm0 = mem[0],zero
; AVX512-NEXT: vmovq %r12, %xmm1
; AVX512-NEXT: vmovq %rbx, %xmm2
; AVX512-NEXT: vmovq %r11, %xmm3
; AVX512-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 8-byte Folded Reload
; AVX512-NEXT: # xmm13 = mem[0],zero
; AVX512-NEXT: vmovq %r12, %xmm14
; AVX512-NEXT: vmovq %rbx, %xmm15
; AVX512-NEXT: vmovq %r13, %xmm0
; AVX512-NEXT: vmovq %r15, %xmm1
; AVX512-NEXT: vmovq %r11, %xmm2
; AVX512-NEXT: vmovq %r8, %xmm3
; AVX512-NEXT: vmovq %r10, %xmm4
; AVX512-NEXT: vmovq %r8, %xmm5
; AVX512-NEXT: vmovq %rdi, %xmm6
; AVX512-NEXT: vmovq %rdi, %xmm5
; AVX512-NEXT: vmovq %rsi, %xmm6
; AVX512-NEXT: vmovq {{[-0-9]+}}(%r{{[sb]}}p), %xmm7 # 8-byte Folded Reload
; AVX512-NEXT: # xmm7 = mem[0],zero
; AVX512-NEXT: vpunpcklqdq {{.*#+}} xmm8 = xmm9[0],xmm8[0]
@ -2869,7 +2860,7 @@ define void @not_avg_v16i8_wide_constants(<16 x i8>* %a, <16 x i8>* %b) nounwind
; AVX512-NEXT: vpmovqd %zmm0, %ymm0
; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512-NEXT: vpmovdb %zmm0, (%rax)
; AVX512-NEXT: addq $24, %rsp
; AVX512-NEXT: addq $16, %rsp
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12
; AVX512-NEXT: popq %r13

View File

@ -77,11 +77,11 @@ define i64 @addressModeWith32bitIndex(i32 %V) {
; CHECK-NEXT: movq %rsp, %rbp
; CHECK-NEXT: .cfi_def_cfa_register %rbp
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill
; CHECK-NEXT: cqto
; CHECK-NEXT: movslq %edi, %rsi
; CHECK-NEXT: idivq (%rcx,%rsi,8)
; CHECK-NEXT: movslq %edi, %rcx
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload
; CHECK-NEXT: idivq (%rsi,%rcx,8)
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
%gep = getelementptr i64, i64* null, i32 %V

View File

@ -2,9 +2,7 @@
; Check no spills to the same stack slot after hoisting.
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET1:-?[0-9]*]](%rsp)
; CHECK: mov{{.}} %{{.*}}, [[SPOFFSET2:-?[0-9]*]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET1]](%rsp)
; CHECK-NOT: mov{{.}} %{{.*}}, [[SPOFFSET2]](%rsp)
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

View File

@ -133,25 +133,26 @@ return:
define i8* @bsd_memchr(i8* %s, i32 %a, i32 %c, i64 %n) nounwind ssp {
; CHECK-LABEL: bsd_memchr:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: testq %rcx, %rcx
; CHECK-NEXT: je .LBB3_4
; CHECK-NEXT: je .LBB3_5
; CHECK-NEXT: # %bb.1: # %preheader
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: movzbl %dl, %edx
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB3_2: # %do.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: cmpl %edx, %esi
; CHECK-NEXT: je .LBB3_5
; CHECK-NEXT: # %bb.3: # %do.cond
; CHECK-NEXT: je .LBB3_3
; CHECK-NEXT: # %bb.4: # %do.cond
; CHECK-NEXT: # in Loop: Header=BB3_2 Depth=1
; CHECK-NEXT: incq %rax
; CHECK-NEXT: incq %rdi
; CHECK-NEXT: decq %rcx
; CHECK-NEXT: jne .LBB3_2
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: .LBB3_5: # %return
; CHECK-NEXT: retq
; CHECK-NEXT: .LBB3_3:
; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
entry:
%cmp = icmp eq i64 %n, 0
br i1 %cmp, label %return, label %preheader

View File

@ -356,7 +356,7 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm2, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB3_1: # %vector.body
@ -365,18 +365,18 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
; SSE2-NEXT: movdqu 16(%rdi,%rcx,2), %xmm6
; SSE2-NEXT: movdqu 32(%rdi,%rcx,2), %xmm7
; SSE2-NEXT: movdqu 48(%rdi,%rcx,2), %xmm9
; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm0
; SSE2-NEXT: pmaddwd %xmm5, %xmm0
; SSE2-NEXT: paddd %xmm0, %xmm2
; SSE2-NEXT: movdqu 16(%rsi,%rcx,2), %xmm0
; SSE2-NEXT: pmaddwd %xmm6, %xmm0
; SSE2-NEXT: paddd %xmm0, %xmm4
; SSE2-NEXT: movdqu 32(%rsi,%rcx,2), %xmm0
; SSE2-NEXT: pmaddwd %xmm7, %xmm0
; SSE2-NEXT: paddd %xmm0, %xmm1
; SSE2-NEXT: movdqu 48(%rsi,%rcx,2), %xmm0
; SSE2-NEXT: pmaddwd %xmm9, %xmm0
; SSE2-NEXT: paddd %xmm0, %xmm3
; SSE2-NEXT: movdqu (%rsi,%rcx,2), %xmm1
; SSE2-NEXT: pmaddwd %xmm5, %xmm1
; SSE2-NEXT: paddd %xmm1, %xmm2
; SSE2-NEXT: movdqu 16(%rsi,%rcx,2), %xmm1
; SSE2-NEXT: pmaddwd %xmm6, %xmm1
; SSE2-NEXT: paddd %xmm1, %xmm4
; SSE2-NEXT: movdqu 32(%rsi,%rcx,2), %xmm1
; SSE2-NEXT: pmaddwd %xmm7, %xmm1
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: movdqu 48(%rsi,%rcx,2), %xmm1
; SSE2-NEXT: pmaddwd %xmm9, %xmm1
; SSE2-NEXT: paddd %xmm1, %xmm3
; SSE2-NEXT: addq $16, %rcx
; SSE2-NEXT: cmpq %rcx, %rax
; SSE2-NEXT: jne .LBB3_1
@ -385,14 +385,14 @@ define i32 @_Z10test_shortPsS_i_1024(i16* nocapture readonly, i16* nocapture rea
; SSE2-NEXT: paddd %xmm8, %xmm3
; SSE2-NEXT: paddd %xmm4, %xmm3
; SSE2-NEXT: paddd %xmm8, %xmm2
; SSE2-NEXT: paddd %xmm8, %xmm1
; SSE2-NEXT: paddd %xmm3, %xmm1
; SSE2-NEXT: paddd %xmm2, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; SSE2-NEXT: paddd %xmm8, %xmm0
; SSE2-NEXT: paddd %xmm3, %xmm0
; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2-NEXT: paddd %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: _Z10test_shortPsS_i_1024:
@ -949,7 +949,7 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
; SSE2-NEXT: xorl %ecx, %ecx
; SSE2-NEXT: pxor %xmm9, %xmm9
; SSE2-NEXT: pxor %xmm4, %xmm4
; SSE2-NEXT: pxor %xmm1, %xmm1
; SSE2-NEXT: pxor %xmm0, %xmm0
; SSE2-NEXT: pxor %xmm3, %xmm3
; SSE2-NEXT: .p2align 4, 0x90
; SSE2-NEXT: .LBB7_1: # %vector.body
@ -963,9 +963,9 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
; SSE2-NEXT: movq {{.*#+}} xmm7 = mem[0],zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm7
; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm0
; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm1
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm2
@ -980,11 +980,11 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm2
; SSE2-NEXT: pmaddwd %xmm7, %xmm2
; SSE2-NEXT: paddd %xmm2, %xmm1
; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: movq {{.*#+}} xmm2 = mem[0],zero
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: psraw $8, %xmm2
; SSE2-NEXT: pmaddwd %xmm0, %xmm2
; SSE2-NEXT: pmaddwd %xmm1, %xmm2
; SSE2-NEXT: paddd %xmm2, %xmm3
; SSE2-NEXT: addq $32, %rcx
; SSE2-NEXT: cmpq %rcx, %rax
@ -994,14 +994,14 @@ define i32 @_Z9test_charPcS_i_1024(i8* nocapture readonly, i8* nocapture readonl
; SSE2-NEXT: paddd %xmm8, %xmm3
; SSE2-NEXT: paddd %xmm4, %xmm3
; SSE2-NEXT: paddd %xmm8, %xmm9
; SSE2-NEXT: paddd %xmm8, %xmm1
; SSE2-NEXT: paddd %xmm3, %xmm1
; SSE2-NEXT: paddd %xmm9, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1]
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
; SSE2-NEXT: paddd %xmm8, %xmm0
; SSE2-NEXT: paddd %xmm3, %xmm0
; SSE2-NEXT: paddd %xmm9, %xmm0
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
; SSE2-NEXT: paddd %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,2,3]
; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: movd %xmm0, %eax
; SSE2-NEXT: retq
;
; AVX1-LABEL: _Z9test_charPcS_i_1024:

View File

@ -604,12 +604,13 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
;
; X64-LABEL: test3:
; X64: # %bb.0: # %entry
; X64-NEXT: xorl %r8d, %r8d
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testl %edx, %edx
; X64-NEXT: je .LBB3_2
; X64-NEXT: je .LBB3_3
; X64-NEXT: # %bb.1: # %bb26.preheader
; X64-NEXT: xorl %r8d, %r8d
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB3_1: # %bb26
; X64-NEXT: .LBB3_2: # %bb26
; X64-NEXT: # =>This Inner Loop Header: Depth=1
; X64-NEXT: movslq %r8d, %r8
; X64-NEXT: movq (%rdi,%r8,8), %rcx
@ -617,8 +618,8 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
; X64-NEXT: addq %rcx, %rax
; X64-NEXT: incl %r8d
; X64-NEXT: cmpl %edx, %r8d
; X64-NEXT: jb .LBB3_1
; X64-NEXT: .LBB3_2: # %bb31
; X64-NEXT: jb .LBB3_2
; X64-NEXT: .LBB3_3: # %bb31
; X64-NEXT: retq
entry:
%tmp2942 = icmp eq i32 %count, 0

View File

@ -10,13 +10,12 @@ define void @foo() {
; X86-O0-LABEL: foo:
; X86-O0: # %bb.0: # %entry
; X86-O0-NEXT: xorl %eax, %eax
; X86-O0-NEXT: movl %eax, %ecx
; X86-O0-NEXT: xorl %eax, %eax
; X86-O0-NEXT: xorl %ecx, %ecx
; X86-O0-NEXT: movzbl c, %edx
; X86-O0-NEXT: subl %edx, %eax
; X86-O0-NEXT: movslq %eax, %rsi
; X86-O0-NEXT: subq %rsi, %rcx
; X86-O0-NEXT: movb %cl, %dil
; X86-O0-NEXT: subl %edx, %ecx
; X86-O0-NEXT: movslq %ecx, %rsi
; X86-O0-NEXT: subq %rsi, %rax
; X86-O0-NEXT: movb %al, %dil
; X86-O0-NEXT: cmpb $0, %dil
; X86-O0-NEXT: setne %dil
; X86-O0-NEXT: andb $1, %dil
@ -26,13 +25,13 @@ define void @foo() {
; X86-O0-NEXT: xorb $-1, %dil
; X86-O0-NEXT: xorb $-1, %dil
; X86-O0-NEXT: andb $1, %dil
; X86-O0-NEXT: movzbl %dil, %eax
; X86-O0-NEXT: movzbl %dil, %ecx
; X86-O0-NEXT: movzbl c, %edx
; X86-O0-NEXT: cmpl %edx, %eax
; X86-O0-NEXT: cmpl %edx, %ecx
; X86-O0-NEXT: setle %dil
; X86-O0-NEXT: andb $1, %dil
; X86-O0-NEXT: movzbl %dil, %eax
; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp)
; X86-O0-NEXT: movzbl %dil, %ecx
; X86-O0-NEXT: movl %ecx, -{{[0-9]+}}(%rsp)
; X86-O0-NEXT: retq
;
; X64-LABEL: foo:

View File

@ -14,22 +14,21 @@ define void @foo() {
; X64-LABEL: foo:
; X64: # %bb.0: # %entry
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: movl %eax, %ecx
; X64-NEXT: movw $0, var_825
; X64-NEXT: movzwl var_32, %eax
; X64-NEXT: movzwl var_32, %ecx
; X64-NEXT: movzwl var_901, %edx
; X64-NEXT: movl %eax, %esi
; X64-NEXT: movl %ecx, %esi
; X64-NEXT: xorl %edx, %esi
; X64-NEXT: movl %eax, %edx
; X64-NEXT: movl %ecx, %edx
; X64-NEXT: xorl %esi, %edx
; X64-NEXT: addl %eax, %edx
; X64-NEXT: addl %ecx, %edx
; X64-NEXT: movslq %edx, %rdi
; X64-NEXT: movq %rdi, var_826
; X64-NEXT: movzwl var_32, %eax
; X64-NEXT: movl %eax, %edi
; X64-NEXT: movzwl var_901, %eax
; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D
; X64-NEXT: movslq %eax, %r8
; X64-NEXT: movzwl var_32, %ecx
; X64-NEXT: movl %ecx, %edi
; X64-NEXT: movzwl var_901, %ecx
; X64-NEXT: xorl $51981, %ecx # imm = 0xCB0D
; X64-NEXT: movslq %ecx, %r8
; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440
; X64-NEXT: xorq %r9, %r8
; X64-NEXT: movq %rdi, %r9
@ -41,11 +40,11 @@ define void @foo() {
; X64-NEXT: orq %r8, %rdi
; X64-NEXT: movw %di, %r10w
; X64-NEXT: movw %r10w, var_900
; X64-NEXT: cmpq var_28, %rcx
; X64-NEXT: cmpq var_28, %rax
; X64-NEXT: setne %r11b
; X64-NEXT: andb $1, %r11b
; X64-NEXT: movzbl %r11b, %eax
; X64-NEXT: movw %ax, %r10w
; X64-NEXT: movzbl %r11b, %ecx
; X64-NEXT: movw %cx, %r10w
; X64-NEXT: movw %r10w, var_827
; X64-NEXT: retq
entry:

View File

@ -19,18 +19,18 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: pushq %rbx
; ILP-NEXT: movq %rcx, %r9
; ILP-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; ILP-NEXT: xorl %eax, %eax
; ILP-NEXT: addq $1, %rsi
; ILP-NEXT: adcq $0, %rdx
; ILP-NEXT: adcq $0, %r9
; ILP-NEXT: adcq $0, %r8
; ILP-NEXT: leal 1(%rsi,%rsi), %edi
; ILP-NEXT: movl $1, %ebp
; ILP-NEXT: xorl %r14d, %r14d
; ILP-NEXT: xorl %eax, %eax
; ILP-NEXT: xorl %r11d, %r11d
; ILP-NEXT: movl %edi, %ecx
; ILP-NEXT: shldq %cl, %rbp, %r14
; ILP-NEXT: movl $1, %r11d
; ILP-NEXT: shlq %cl, %r11
; ILP-NEXT: shldq %cl, %rbp, %r11
; ILP-NEXT: movl $1, %r14d
; ILP-NEXT: shlq %cl, %r14
; ILP-NEXT: movb $-128, %r10b
; ILP-NEXT: subb %dil, %r10b
; ILP-NEXT: movq %r9, %r13
@ -41,33 +41,33 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: xorl %r15d, %r15d
; ILP-NEXT: movl %edi, %ecx
; ILP-NEXT: shldq %cl, %r15, %r15
; ILP-NEXT: movq %rsi, %rbx
; ILP-NEXT: shrdq %cl, %rdx, %rbx
; ILP-NEXT: movq %rsi, %rbp
; ILP-NEXT: shrdq %cl, %rdx, %rbp
; ILP-NEXT: shrq %cl, %rdx
; ILP-NEXT: addb $-128, %cl
; ILP-NEXT: shrdq %cl, %r8, %r9
; ILP-NEXT: testb $64, %dil
; ILP-NEXT: cmovneq %r11, %r14
; ILP-NEXT: cmoveq %rbx, %rdx
; ILP-NEXT: cmovneq %r14, %r11
; ILP-NEXT: cmoveq %rbp, %rdx
; ILP-NEXT: cmovneq %rax, %r15
; ILP-NEXT: cmovneq %rax, %r11
; ILP-NEXT: cmovneq %rax, %r14
; ILP-NEXT: testb $64, %r10b
; ILP-NEXT: cmovneq %rax, %r12
; ILP-NEXT: cmovneq %rax, %r13
; ILP-NEXT: movl $1, %ebx
; ILP-NEXT: shlq %cl, %rbx
; ILP-NEXT: movl $1, %ebp
; ILP-NEXT: shlq %cl, %rbp
; ILP-NEXT: orl %edx, %r13d
; ILP-NEXT: xorl %edx, %edx
; ILP-NEXT: movl $1, %ebp
; ILP-NEXT: shldq %cl, %rbp, %rdx
; ILP-NEXT: movl $1, %ebx
; ILP-NEXT: shldq %cl, %rbx, %rdx
; ILP-NEXT: shrq %cl, %r8
; ILP-NEXT: testb $64, %cl
; ILP-NEXT: cmoveq %r9, %r8
; ILP-NEXT: cmovneq %rbx, %rdx
; ILP-NEXT: cmovneq %rax, %rbx
; ILP-NEXT: cmovneq %rbp, %rdx
; ILP-NEXT: cmovneq %rax, %rbp
; ILP-NEXT: testb %dil, %dil
; ILP-NEXT: cmovsq %rax, %r14
; ILP-NEXT: cmovsq %rax, %r11
; ILP-NEXT: cmovsq %rax, %r14
; ILP-NEXT: jns .LBB0_2
; ILP-NEXT: # %bb.1:
; ILP-NEXT: movl %r8d, %r13d
@ -76,20 +76,20 @@ define i256 @test1(i256 %a) nounwind {
; ILP-NEXT: # %bb.3:
; ILP-NEXT: movl %r13d, %esi
; ILP-NEXT: .LBB0_4:
; ILP-NEXT: cmovnsq %r12, %rbx
; ILP-NEXT: cmoveq %rax, %rbx
; ILP-NEXT: cmovnsq %r12, %rbp
; ILP-NEXT: cmoveq %rax, %rbp
; ILP-NEXT: cmovnsq %r15, %rdx
; ILP-NEXT: cmoveq %rax, %rdx
; ILP-NEXT: testb $1, %sil
; ILP-NEXT: cmovneq %rax, %rdx
; ILP-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; ILP-NEXT: movq %rdx, 24(%rax)
; ILP-NEXT: cmovneq %rax, %rbx
; ILP-NEXT: movq %rbx, 16(%rax)
; ILP-NEXT: cmovneq %rax, %r14
; ILP-NEXT: movq %r14, 8(%rax)
; ILP-NEXT: cmovneq %rax, %rbp
; ILP-NEXT: movq %rbp, 16(%rax)
; ILP-NEXT: cmovneq %rax, %r11
; ILP-NEXT: movq %r11, (%rax)
; ILP-NEXT: movq %r11, 8(%rax)
; ILP-NEXT: cmovneq %rax, %r14
; ILP-NEXT: movq %r14, (%rax)
; ILP-NEXT: popq %rbx
; ILP-NEXT: popq %r12
; ILP-NEXT: popq %r13
@ -100,7 +100,6 @@ define i256 @test1(i256 %a) nounwind {
;
; HYBRID-LABEL: test1:
; HYBRID: # %bb.0:
; HYBRID-NEXT: pushq %rbp
; HYBRID-NEXT: pushq %r15
; HYBRID-NEXT: pushq %r14
; HYBRID-NEXT: pushq %r13
@ -112,84 +111,82 @@ define i256 @test1(i256 %a) nounwind {
; HYBRID-NEXT: adcq $0, %rdx
; HYBRID-NEXT: adcq $0, %r9
; HYBRID-NEXT: adcq $0, %r8
; HYBRID-NEXT: xorl %r10d, %r10d
; HYBRID-NEXT: leal 1(%rsi,%rsi), %edi
; HYBRID-NEXT: xorl %r14d, %r14d
; HYBRID-NEXT: xorl %r15d, %r15d
; HYBRID-NEXT: movl %edi, %ecx
; HYBRID-NEXT: shldq %cl, %r14, %r14
; HYBRID-NEXT: shldq %cl, %r15, %r15
; HYBRID-NEXT: testb $64, %dil
; HYBRID-NEXT: cmovneq %r10, %r14
; HYBRID-NEXT: movl $1, %ebp
; HYBRID-NEXT: cmovneq %r14, %r15
; HYBRID-NEXT: movl $1, %r11d
; HYBRID-NEXT: movl $1, %r12d
; HYBRID-NEXT: shlq %cl, %r12
; HYBRID-NEXT: testb $64, %dil
; HYBRID-NEXT: movq %r12, %r11
; HYBRID-NEXT: cmovneq %r10, %r11
; HYBRID-NEXT: movq %r12, %r10
; HYBRID-NEXT: cmovneq %r14, %r10
; HYBRID-NEXT: movq %rsi, %rbx
; HYBRID-NEXT: shrdq %cl, %rdx, %rbx
; HYBRID-NEXT: shrq %cl, %rdx
; HYBRID-NEXT: testb $64, %dil
; HYBRID-NEXT: cmoveq %rbx, %rdx
; HYBRID-NEXT: xorl %r15d, %r15d
; HYBRID-NEXT: shldq %cl, %rbp, %r15
; HYBRID-NEXT: xorl %r13d, %r13d
; HYBRID-NEXT: shldq %cl, %r11, %r13
; HYBRID-NEXT: testb $64, %dil
; HYBRID-NEXT: cmovneq %r12, %r15
; HYBRID-NEXT: cmovneq %r12, %r13
; HYBRID-NEXT: movb $-128, %cl
; HYBRID-NEXT: subb %dil, %cl
; HYBRID-NEXT: movq %r9, %r13
; HYBRID-NEXT: shlq %cl, %r13
; HYBRID-NEXT: movq %r9, %rbx
; HYBRID-NEXT: shlq %cl, %rbx
; HYBRID-NEXT: movl $1, %r12d
; HYBRID-NEXT: shrdq %cl, %r10, %r12
; HYBRID-NEXT: shrdq %cl, %r14, %r12
; HYBRID-NEXT: testb $64, %cl
; HYBRID-NEXT: cmovneq %r10, %r12
; HYBRID-NEXT: cmovneq %r10, %r13
; HYBRID-NEXT: orl %edx, %r13d
; HYBRID-NEXT: cmovneq %r14, %r12
; HYBRID-NEXT: cmovneq %r14, %rbx
; HYBRID-NEXT: orl %edx, %ebx
; HYBRID-NEXT: movl %edi, %ecx
; HYBRID-NEXT: addb $-128, %cl
; HYBRID-NEXT: shrdq %cl, %r8, %r9
; HYBRID-NEXT: shrq %cl, %r8
; HYBRID-NEXT: xorl %edx, %edx
; HYBRID-NEXT: shldq %cl, %rbp, %rdx
; HYBRID-NEXT: shlq %cl, %rbp
; HYBRID-NEXT: shldq %cl, %r11, %rdx
; HYBRID-NEXT: shlq %cl, %r11
; HYBRID-NEXT: testb $64, %cl
; HYBRID-NEXT: cmovneq %rbp, %rdx
; HYBRID-NEXT: cmovneq %r11, %rdx
; HYBRID-NEXT: cmoveq %r9, %r8
; HYBRID-NEXT: cmovneq %r10, %rbp
; HYBRID-NEXT: cmovneq %r14, %r11
; HYBRID-NEXT: testb %dil, %dil
; HYBRID-NEXT: jns .LBB0_2
; HYBRID-NEXT: # %bb.1:
; HYBRID-NEXT: movl %r8d, %r13d
; HYBRID-NEXT: movl %r8d, %ebx
; HYBRID-NEXT: .LBB0_2:
; HYBRID-NEXT: je .LBB0_4
; HYBRID-NEXT: # %bb.3:
; HYBRID-NEXT: movl %r13d, %esi
; HYBRID-NEXT: movl %ebx, %esi
; HYBRID-NEXT: .LBB0_4:
; HYBRID-NEXT: cmovsq %r10, %r15
; HYBRID-NEXT: cmovnsq %r12, %rbp
; HYBRID-NEXT: cmoveq %r10, %rbp
; HYBRID-NEXT: cmovnsq %r14, %rdx
; HYBRID-NEXT: cmoveq %r10, %rdx
; HYBRID-NEXT: cmovsq %r10, %r11
; HYBRID-NEXT: cmovsq %r14, %r13
; HYBRID-NEXT: cmovnsq %r12, %r11
; HYBRID-NEXT: cmoveq %r14, %r11
; HYBRID-NEXT: cmovnsq %r15, %rdx
; HYBRID-NEXT: cmoveq %r14, %rdx
; HYBRID-NEXT: cmovsq %r14, %r10
; HYBRID-NEXT: testb $1, %sil
; HYBRID-NEXT: cmovneq %rax, %rdx
; HYBRID-NEXT: movq %rdx, 24(%rax)
; HYBRID-NEXT: cmovneq %rax, %rbp
; HYBRID-NEXT: movq %rbp, 16(%rax)
; HYBRID-NEXT: cmovneq %rax, %r15
; HYBRID-NEXT: movq %r15, 8(%rax)
; HYBRID-NEXT: cmovneq %rax, %r11
; HYBRID-NEXT: movq %r11, (%rax)
; HYBRID-NEXT: movq %r11, 16(%rax)
; HYBRID-NEXT: cmovneq %rax, %r13
; HYBRID-NEXT: movq %r13, 8(%rax)
; HYBRID-NEXT: cmovneq %rax, %r10
; HYBRID-NEXT: movq %r10, (%rax)
; HYBRID-NEXT: popq %rbx
; HYBRID-NEXT: popq %r12
; HYBRID-NEXT: popq %r13
; HYBRID-NEXT: popq %r14
; HYBRID-NEXT: popq %r15
; HYBRID-NEXT: popq %rbp
; HYBRID-NEXT: retq
;
; BURR-LABEL: test1:
; BURR: # %bb.0:
; BURR-NEXT: pushq %rbp
; BURR-NEXT: pushq %r15
; BURR-NEXT: pushq %r14
; BURR-NEXT: pushq %r13
@ -201,79 +198,78 @@ define i256 @test1(i256 %a) nounwind {
; BURR-NEXT: adcq $0, %rdx
; BURR-NEXT: adcq $0, %r9
; BURR-NEXT: adcq $0, %r8
; BURR-NEXT: xorl %r10d, %r10d
; BURR-NEXT: leal 1(%rsi,%rsi), %edi
; BURR-NEXT: xorl %r14d, %r14d
; BURR-NEXT: xorl %r15d, %r15d
; BURR-NEXT: movl %edi, %ecx
; BURR-NEXT: shldq %cl, %r14, %r14
; BURR-NEXT: shldq %cl, %r15, %r15
; BURR-NEXT: testb $64, %dil
; BURR-NEXT: cmovneq %r10, %r14
; BURR-NEXT: movl $1, %ebp
; BURR-NEXT: cmovneq %r14, %r15
; BURR-NEXT: movl $1, %r11d
; BURR-NEXT: movl $1, %r12d
; BURR-NEXT: shlq %cl, %r12
; BURR-NEXT: testb $64, %dil
; BURR-NEXT: movq %r12, %r11
; BURR-NEXT: cmovneq %r10, %r11
; BURR-NEXT: movq %r12, %r10
; BURR-NEXT: cmovneq %r14, %r10
; BURR-NEXT: movq %rsi, %rbx
; BURR-NEXT: shrdq %cl, %rdx, %rbx
; BURR-NEXT: shrq %cl, %rdx
; BURR-NEXT: testb $64, %dil
; BURR-NEXT: cmoveq %rbx, %rdx
; BURR-NEXT: xorl %r15d, %r15d
; BURR-NEXT: shldq %cl, %rbp, %r15
; BURR-NEXT: xorl %r13d, %r13d
; BURR-NEXT: shldq %cl, %r11, %r13
; BURR-NEXT: testb $64, %dil
; BURR-NEXT: cmovneq %r12, %r15
; BURR-NEXT: cmovneq %r12, %r13
; BURR-NEXT: movb $-128, %cl
; BURR-NEXT: subb %dil, %cl
; BURR-NEXT: movq %r9, %r13
; BURR-NEXT: shlq %cl, %r13
; BURR-NEXT: movq %r9, %rbx
; BURR-NEXT: shlq %cl, %rbx
; BURR-NEXT: movl $1, %r12d
; BURR-NEXT: shrdq %cl, %r10, %r12
; BURR-NEXT: shrdq %cl, %r14, %r12
; BURR-NEXT: testb $64, %cl
; BURR-NEXT: cmovneq %r10, %r12
; BURR-NEXT: cmovneq %r10, %r13
; BURR-NEXT: orl %edx, %r13d
; BURR-NEXT: cmovneq %r14, %r12
; BURR-NEXT: cmovneq %r14, %rbx
; BURR-NEXT: orl %edx, %ebx
; BURR-NEXT: movl %edi, %ecx
; BURR-NEXT: addb $-128, %cl
; BURR-NEXT: shrdq %cl, %r8, %r9
; BURR-NEXT: xorl %edx, %edx
; BURR-NEXT: shldq %cl, %rbp, %rdx
; BURR-NEXT: shldq %cl, %r11, %rdx
; BURR-NEXT: shrq %cl, %r8
; BURR-NEXT: shlq %cl, %rbp
; BURR-NEXT: shlq %cl, %r11
; BURR-NEXT: testb $64, %cl
; BURR-NEXT: cmovneq %rbp, %rdx
; BURR-NEXT: cmovneq %r11, %rdx
; BURR-NEXT: cmoveq %r9, %r8
; BURR-NEXT: cmovneq %r10, %rbp
; BURR-NEXT: cmovneq %r14, %r11
; BURR-NEXT: testb %dil, %dil
; BURR-NEXT: jns .LBB0_2
; BURR-NEXT: # %bb.1:
; BURR-NEXT: movl %r8d, %r13d
; BURR-NEXT: movl %r8d, %ebx
; BURR-NEXT: .LBB0_2:
; BURR-NEXT: je .LBB0_4
; BURR-NEXT: # %bb.3:
; BURR-NEXT: movl %r13d, %esi
; BURR-NEXT: movl %ebx, %esi
; BURR-NEXT: .LBB0_4:
; BURR-NEXT: cmovsq %r10, %r15
; BURR-NEXT: cmovnsq %r12, %rbp
; BURR-NEXT: cmoveq %r10, %rbp
; BURR-NEXT: cmovnsq %r14, %rdx
; BURR-NEXT: cmoveq %r10, %rdx
; BURR-NEXT: cmovsq %r10, %r11
; BURR-NEXT: cmovsq %r14, %r13
; BURR-NEXT: cmovnsq %r12, %r11
; BURR-NEXT: cmoveq %r14, %r11
; BURR-NEXT: cmovnsq %r15, %rdx
; BURR-NEXT: cmoveq %r14, %rdx
; BURR-NEXT: cmovsq %r14, %r10
; BURR-NEXT: testb $1, %sil
; BURR-NEXT: cmovneq %rax, %rdx
; BURR-NEXT: movq %rdx, 24(%rax)
; BURR-NEXT: cmovneq %rax, %rbp
; BURR-NEXT: movq %rbp, 16(%rax)
; BURR-NEXT: cmovneq %rax, %r15
; BURR-NEXT: movq %r15, 8(%rax)
; BURR-NEXT: cmovneq %rax, %r11
; BURR-NEXT: movq %r11, (%rax)
; BURR-NEXT: movq %r11, 16(%rax)
; BURR-NEXT: cmovneq %rax, %r13
; BURR-NEXT: movq %r13, 8(%rax)
; BURR-NEXT: cmovneq %rax, %r10
; BURR-NEXT: movq %r10, (%rax)
; BURR-NEXT: popq %rbx
; BURR-NEXT: popq %r12
; BURR-NEXT: popq %r13
; BURR-NEXT: popq %r14
; BURR-NEXT: popq %r15
; BURR-NEXT: popq %rbp
; BURR-NEXT: retq
;
; SRC-LABEL: test1:
@ -300,8 +296,8 @@ define i256 @test1(i256 %a) nounwind {
; SRC-NEXT: movl %r11d, %ecx
; SRC-NEXT: shrdq %cl, %rdx, %rbp
; SRC-NEXT: shrq %cl, %rdx
; SRC-NEXT: xorl %r15d, %r15d
; SRC-NEXT: movl $1, %edi
; SRC-NEXT: xorl %r15d, %r15d
; SRC-NEXT: xorl %r14d, %r14d
; SRC-NEXT: shldq %cl, %rdi, %r14
; SRC-NEXT: xorl %r13d, %r13d
@ -909,15 +905,15 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; ILP-LABEL: test4:
; ILP: # %bb.0:
; ILP-NEXT: xorl %ecx, %ecx
; ILP-NEXT: xorl %edx, %edx
; ILP-NEXT: addq $1, %rsi
; ILP-NEXT: setb %dl
; ILP-NEXT: setb %cl
; ILP-NEXT: movl $2, %eax
; ILP-NEXT: xorl %edx, %edx
; ILP-NEXT: cmpq %rdi, %rsi
; ILP-NEXT: sbbq $0, %rdx
; ILP-NEXT: movl $0, %edx
; ILP-NEXT: sbbq $0, %rdx
; ILP-NEXT: sbbq $0, %rcx
; ILP-NEXT: movl $0, %ecx
; ILP-NEXT: sbbq $0, %rcx
; ILP-NEXT: sbbq $0, %rdx
; ILP-NEXT: setae %cl
; ILP-NEXT: movzbl %cl, %ecx
; ILP-NEXT: subq %rcx, %rax
@ -926,14 +922,14 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; HYBRID-LABEL: test4:
; HYBRID: # %bb.0:
; HYBRID-NEXT: xorl %eax, %eax
; HYBRID-NEXT: xorl %ecx, %ecx
; HYBRID-NEXT: addq $1, %rsi
; HYBRID-NEXT: setb %cl
; HYBRID-NEXT: setb %al
; HYBRID-NEXT: xorl %ecx, %ecx
; HYBRID-NEXT: cmpq %rdi, %rsi
; HYBRID-NEXT: sbbq $0, %rcx
; HYBRID-NEXT: movl $0, %ecx
; HYBRID-NEXT: sbbq $0, %rcx
; HYBRID-NEXT: sbbq $0, %rax
; HYBRID-NEXT: movl $0, %eax
; HYBRID-NEXT: sbbq $0, %rax
; HYBRID-NEXT: sbbq $0, %rcx
; HYBRID-NEXT: setae %al
; HYBRID-NEXT: movzbl %al, %ecx
; HYBRID-NEXT: movl $2, %eax
@ -943,14 +939,14 @@ define i64 @test4(i64 %a, i64 %b) nounwind {
; BURR-LABEL: test4:
; BURR: # %bb.0:
; BURR-NEXT: xorl %eax, %eax
; BURR-NEXT: xorl %ecx, %ecx
; BURR-NEXT: addq $1, %rsi
; BURR-NEXT: setb %cl
; BURR-NEXT: setb %al
; BURR-NEXT: xorl %ecx, %ecx
; BURR-NEXT: cmpq %rdi, %rsi
; BURR-NEXT: sbbq $0, %rcx
; BURR-NEXT: movl $0, %ecx
; BURR-NEXT: sbbq $0, %rcx
; BURR-NEXT: sbbq $0, %rax
; BURR-NEXT: movl $0, %eax
; BURR-NEXT: sbbq $0, %rax
; BURR-NEXT: sbbq $0, %rcx
; BURR-NEXT: setae %al
; BURR-NEXT: movzbl %al, %ecx
; BURR-NEXT: movl $2, %eax

View File

@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
; This test checks that we use "movq $0, (%rsp)" to spill a 0 to the stack. It
; was reduced from a larger function.
; CHECK: movq $0, (%rsp) # 8-byte Folded Spill
%struct.foo = type { i8*, i32 }
declare void @pluto()
define void @spam() {
bb:
br label %bb13
bb1: ; preds = %bb18
call void @pluto()
%tmp = getelementptr inbounds %struct.foo, %struct.foo* %tmp20, i64 0, i32 1
%tmp2 = bitcast i32* %tmp to %struct.foo**
store %struct.foo* null, %struct.foo** %tmp2
unreachable
bb3: ; preds = %bb18
call void @pluto()
store i8* %tmp22, i8** undef
unreachable
bb4: ; preds = %bb18
call void @pluto()
br label %bb13
bb5: ; preds = %bb18
%tmp7 = add nsw i32 %tmp23, 1
store i8* %tmp22, i8** undef
unreachable
bb8: ; preds = %bb18
store %struct.foo* %tmp14, %struct.foo** undef
unreachable
bb9: ; preds = %bb18
%tmp10 = load %struct.foo*, %struct.foo** undef
br label %bb13
bb13: ; preds = %bb18, %bb9, %bb4, %bb
%tmp14 = phi %struct.foo* [ %tmp14, %bb18 ], [ %tmp14, %bb4 ], [ null, %bb ], [ %tmp10, %bb9 ]
%tmp15 = phi %struct.foo* [ %tmp26, %bb18 ], [ %tmp26, %bb4 ], [ null, %bb ], [ %tmp26, %bb9 ]
%tmp16 = phi i32 [ %tmp23, %bb18 ], [ %tmp23, %bb4 ], [ 0, %bb ], [ %tmp23, %bb9 ]
br label %bb17
bb17: ; preds = %bb13
br i1 false, label %bb27, label %bb18
bb18: ; preds = %bb17
%tmp19 = load %struct.foo*, %struct.foo** undef
%tmp20 = getelementptr inbounds %struct.foo, %struct.foo* %tmp19, i64 0
%tmp21 = getelementptr inbounds %struct.foo, %struct.foo* %tmp20, i64 0, i32 0
%tmp22 = load i8*, i8** %tmp21
%tmp23 = add nsw i32 %tmp16, -1
%tmp24 = getelementptr inbounds %struct.foo, %struct.foo* %tmp15, i64 0, i32 1
%tmp25 = bitcast i32* %tmp24 to %struct.foo**
%tmp26 = load %struct.foo*, %struct.foo** %tmp25
switch i32 undef, label %bb9 [
i32 1, label %bb1
i32 2, label %bb3
i32 3, label %bb4
i32 4, label %bb5
i32 5, label %bb13
i32 6, label %bb8
]
bb27: ; preds = %bb17
ret void
}

View File

@ -41,8 +41,7 @@ define float @caller(i8* %error_ref) {
; CHECK-APPLE: callq {{.*}}free
; CHECK-O0-LABEL: caller:
; CHECK-O0: xorl
; CHECK-O0: movl %{{.*}}, %r12d
; CHECK-O0: xorl %r12d, %r12d
; CHECK-O0: callq {{.*}}foo
; CHECK-O0: jne
entry:
@ -78,8 +77,7 @@ define float @caller2(i8* %error_ref) {
; CHECK-APPLE: callq {{.*}}free
; CHECK-O0-LABEL: caller2:
; CHECK-O0: xorl
; CHECK-O0: movl %{{.*}}, %r12d
; CHECK-O0: xorl %r12d, %r12d
; CHECK-O0: callq {{.*}}foo
; CHECK-O0: movq %r12, [[ID:%[a-z]+]]
; CHECK-O0: cmpq $0, %r12
@ -254,8 +252,7 @@ define float @caller3(i8* %error_ref) {
; CHECK-APPLE: callq {{.*}}free
; CHECK-O0-LABEL: caller3:
; CHECK-O0: xorl
; CHECK-O0: movl {{.*}}, %r12d
; CHECK-O0: xorl %r12d, %r12d
; CHECK-O0: movl $1, %esi
; CHECK-O0: movq {{.*}}, %rdi
; CHECK-O0: callq {{.*}}foo_sret
@ -313,14 +310,12 @@ define float @caller_with_multiple_swifterror_values(i8* %error_ref, i8* %error_
; CHECK-O0-LABEL: caller_with_multiple_swifterror_values:
; The first swifterror value:
; CHECK-O0: xorl
; CHECK-O0: movl %{{.*}}, %r12d
; CHECK-O0: xorl %r12d, %r12d
; CHECK-O0: callq {{.*}}foo
; CHECK-O0: jne
; The second swifterror value:
; CHECK-O0: xorl
; CHECK-O0: movl %{{.*}}, %r12d
; CHECK-O0: xorl %r12d, %r12d
; CHECK-O0: callq {{.*}}foo
; CHECK-O0: jne
entry:
@ -715,8 +710,7 @@ declare swiftcc void @foo2(%swift_error** swifterror)
; Make sure we properly assign registers during fast-isel.
; CHECK-O0-LABEL: testAssign
; CHECK-O0: pushq %r12
; CHECK-O0: xorl [[ZERO:%[a-z0-9]+]], [[ZERO]]
; CHECK-O0: movl [[ZERO]], %r12d
; CHECK-O0: xorl %r12d, %r12d
; CHECK-O0: callq _foo2
; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]]
;
@ -792,8 +786,7 @@ a:
; CHECK-O0-LABEL: testAssign4
; CHECK-O0: callq _foo2
; CHECK-O0: xorl %ecx, %ecx
; CHECK-O0: movl %ecx, %eax
; CHECK-O0: xorl %eax, %eax
; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]]
; CHECK-O0: movq [[SLOT]], %rax
; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]]